From 2dec7636a084b7b74052e6516d382af90dc60470 Mon Sep 17 00:00:00 2001 From: joboet Date: Fri, 21 Mar 2025 19:42:17 +0100 Subject: [PATCH 001/358] core: simplify `Extend` for tuples This is an alternative to #137400. The current macro is incredibly complicated and introduces subtle bugs like calling the `extend_one` of the individual collections in backwards order. This PR drastically simplifies the macro by removing recursion and moving the specialization out of the macro. It also fixes the ordering issue described above (I've stolen the test of the new behaviour from #137400). Additionally, the 1-tuple is now special-cased to allow taking advantage of the well-optimized `Extend` implementations of the individual collection. --- core/src/iter/traits/collect.rs | 432 +++++++++++++++++--------------- 1 file changed, 236 insertions(+), 196 deletions(-) diff --git a/core/src/iter/traits/collect.rs b/core/src/iter/traits/collect.rs index 97bb21c8a36e8..b4ab81d15a330 100644 --- a/core/src/iter/traits/collect.rs +++ b/core/src/iter/traits/collect.rs @@ -459,234 +459,274 @@ impl Extend<()> for () { fn extend_one(&mut self, _item: ()) {} } -macro_rules! spec_tuple_impl { - ( - ( - $ty_name:ident, $var_name:ident, $extend_ty_name: ident, - $trait_name:ident, $default_fn_name:ident, $cnt:tt - ), - ) => { - spec_tuple_impl!( - $trait_name, - $default_fn_name, - #[doc(fake_variadic)] - #[doc = "This trait is implemented for tuples up to twelve items long. The `impl`s for \ - 1- and 3- through 12-ary tuples were stabilized after 2-tuples, in \ - 1.85.0."] - => ($ty_name, $var_name, $extend_ty_name, $cnt), - ); - }; - ( - ( - $ty_name:ident, $var_name:ident, $extend_ty_name: ident, - $trait_name:ident, $default_fn_name:ident, $cnt:tt - ), - $( - ( - $ty_names:ident, $var_names:ident, $extend_ty_names:ident, - $trait_names:ident, $default_fn_names:ident, $cnts:tt - ), - )* - ) => { - spec_tuple_impl!( - $( - ( - $ty_names, $var_names, $extend_ty_names, - $trait_names, $default_fn_names, $cnts - ), - )* - ); - spec_tuple_impl!( - $trait_name, - $default_fn_name, - #[doc(hidden)] - => ( - $ty_name, $var_name, $extend_ty_name, $cnt - ), - $( - ( - $ty_names, $var_names, $extend_ty_names, $cnts - ), - )* - ); - }; - ( - $trait_name:ident, $default_fn_name:ident, #[$meta:meta] - $(#[$doctext:meta])? => $( - ( - $ty_names:ident, $var_names:ident, $extend_ty_names:ident, $cnts:tt - ), - )* - ) => { - #[$meta] - $(#[$doctext])? - #[stable(feature = "extend_for_tuple", since = "1.56.0")] - impl<$($ty_names,)* $($extend_ty_names,)*> Extend<($($ty_names,)*)> for ($($extend_ty_names,)*) - where - $($extend_ty_names: Extend<$ty_names>,)* - { - /// Allows to `extend` a tuple of collections that also implement `Extend`. - /// - /// See also: [`Iterator::unzip`] - /// - /// # Examples - /// ``` - /// // Example given for a 2-tuple, but 1- through 12-tuples are supported - /// let mut tuple = (vec![0], vec![1]); - /// tuple.extend([(2, 3), (4, 5), (6, 7)]); - /// assert_eq!(tuple.0, [0, 2, 4, 6]); - /// assert_eq!(tuple.1, [1, 3, 5, 7]); - /// - /// // also allows for arbitrarily nested tuples as elements - /// let mut nested_tuple = (vec![1], (vec![2], vec![3])); - /// nested_tuple.extend([(4, (5, 6)), (7, (8, 9))]); - /// - /// let (a, (b, c)) = nested_tuple; - /// assert_eq!(a, [1, 4, 7]); - /// assert_eq!(b, [2, 5, 8]); - /// assert_eq!(c, [3, 6, 9]); - /// ``` - fn extend>(&mut self, into_iter: T) { - let ($($var_names,)*) = self; - let iter = into_iter.into_iter(); - $trait_name::extend(iter, $($var_names,)*); - } +/// This trait is implemented for tuples up to twelve items long. The `impl`s for +/// 1- and 3- through 12-ary tuples were stabilized after 2-tuples, in 1.85.0. +#[doc(fake_variadic)] // the other implementations are below. +#[stable(feature = "extend_for_tuple", since = "1.56.0")] +impl Extend<(T,)> for (ExtendT,) +where + ExtendT: Extend, +{ + /// Allows to `extend` a tuple of collections that also implement `Extend`. + /// + /// See also: [`Iterator::unzip`] + /// + /// # Examples + /// ``` + /// // Example given for a 2-tuple, but 1- through 12-tuples are supported + /// let mut tuple = (vec![0], vec![1]); + /// tuple.extend([(2, 3), (4, 5), (6, 7)]); + /// assert_eq!(tuple.0, [0, 2, 4, 6]); + /// assert_eq!(tuple.1, [1, 3, 5, 7]); + /// + /// // also allows for arbitrarily nested tuples as elements + /// let mut nested_tuple = (vec![1], (vec![2], vec![3])); + /// nested_tuple.extend([(4, (5, 6)), (7, (8, 9))]); + /// + /// let (a, (b, c)) = nested_tuple; + /// assert_eq!(a, [1, 4, 7]); + /// assert_eq!(b, [2, 5, 8]); + /// assert_eq!(c, [3, 6, 9]); + /// ``` + fn extend>(&mut self, iter: I) { + self.0.extend(iter.into_iter().map(|t| t.0)); + } - fn extend_one(&mut self, item: ($($ty_names,)*)) { - $(self.$cnts.extend_one(item.$cnts);)* - } + fn extend_one(&mut self, item: (T,)) { + self.0.extend_one(item.0) + } - fn extend_reserve(&mut self, additional: usize) { - $(self.$cnts.extend_reserve(additional);)* - } + fn extend_reserve(&mut self, additional: usize) { + self.0.extend_reserve(additional) + } - unsafe fn extend_one_unchecked(&mut self, item: ($($ty_names,)*)) { - // SAFETY: Those are our safety preconditions, and we correctly forward `extend_reserve`. - unsafe { - $(self.$cnts.extend_one_unchecked(item.$cnts);)* - } - } - } + unsafe fn extend_one_unchecked(&mut self, item: (T,)) { + // SAFETY: the caller guarantees all preconditions. + unsafe { self.0.extend_one_unchecked(item.0) } + } +} - trait $trait_name<$($ty_names),*> { - fn extend(self, $($var_names: &mut $ty_names,)*); - } +/// This implementation turns an iterator of tuples into a tuple of types which implement +/// [`Default`] and [`Extend`]. +/// +/// This is similar to [`Iterator::unzip`], but is also composable with other [`FromIterator`] +/// implementations: +/// +/// ```rust +/// # fn main() -> Result<(), core::num::ParseIntError> { +/// let string = "1,2,123,4"; +/// +/// // Example given for a 2-tuple, but 1- through 12-tuples are supported +/// let (numbers, lengths): (Vec<_>, Vec<_>) = string +/// .split(',') +/// .map(|s| s.parse().map(|n: u32| (n, s.len()))) +/// .collect::>()?; +/// +/// assert_eq!(numbers, [1, 2, 123, 4]); +/// assert_eq!(lengths, [1, 1, 3, 1]); +/// # Ok(()) } +/// ``` +#[doc(fake_variadic)] // the other implementations are below. +#[stable(feature = "from_iterator_for_tuple", since = "1.79.0")] +impl FromIterator<(T,)> for (ExtendT,) +where + ExtendT: Default + Extend, +{ + fn from_iter>(iter: Iter) -> Self { + let mut res = ExtendT::default(); + res.extend(iter.into_iter().map(|t| t.0)); + (res,) + } +} - fn $default_fn_name<$($ty_names,)* $($extend_ty_names,)*>( - iter: impl Iterator, - $($var_names: &mut $extend_ty_names,)* - ) where - $($extend_ty_names: Extend<$ty_names>,)* - { - fn extend<'a, $($ty_names,)*>( - $($var_names: &'a mut impl Extend<$ty_names>,)* - ) -> impl FnMut((), ($($ty_names,)*)) + 'a { - #[allow(non_snake_case)] - move |(), ($($extend_ty_names,)*)| { - $($var_names.extend_one($extend_ty_names);)* - } - } +/// An implementation of [`extend`](Extend::extend) that calls `extend_one` or +/// `extend_one_unchecked` for each element of the iterator. +fn default_extend(collection: &mut ExtendT, iter: I) +where + ExtendT: Extend, + I: IntoIterator, +{ + // Specialize on `TrustedLen` and call `extend_one_unchecked` where + // applicable. + trait SpecExtend { + fn extend(&mut self, iter: I); + } + // Extracting these to separate functions avoid monomorphising the closures + // for every iterator type. + fn extender(collection: &mut ExtendT) -> impl FnMut(T) + use<'_, ExtendT, T> + where + ExtendT: Extend, + { + move |item| collection.extend_one(item) + } + + unsafe fn unchecked_extender( + collection: &mut ExtendT, + ) -> impl FnMut(T) + use<'_, ExtendT, T> + where + ExtendT: Extend, + { + // SAFETY: we make sure that there is enough space at the callsite of + // this function. + move |item| unsafe { collection.extend_one_unchecked(item) } + } + + impl SpecExtend for ExtendT + where + ExtendT: Extend, + I: Iterator, + { + default fn extend(&mut self, iter: I) { let (lower_bound, _) = iter.size_hint(); if lower_bound > 0 { - $($var_names.extend_reserve(lower_bound);)* + self.extend_reserve(lower_bound); } - iter.fold((), extend($($var_names,)*)); + iter.for_each(extender(self)) } + } - impl<$($ty_names,)* $($extend_ty_names,)* Iter> $trait_name<$($extend_ty_names),*> for Iter - where - $($extend_ty_names: Extend<$ty_names>,)* - Iter: Iterator, - { - default fn extend(self, $($var_names: &mut $extend_ty_names),*) { - $default_fn_name(self, $($var_names),*); + impl SpecExtend for ExtendT + where + ExtendT: Extend, + I: TrustedLen, + { + fn extend(&mut self, iter: I) { + let (lower_bound, upper_bound) = iter.size_hint(); + if lower_bound > 0 { + self.extend_reserve(lower_bound); + } + + if upper_bound.is_none() { + // We cannot reserve more than `usize::MAX` items, and this is likely to go out of memory anyway. + iter.for_each(extender(self)) + } else { + // SAFETY: We reserve enough space for the `size_hint`, and the iterator is + // `TrustedLen` so its `size_hint` is exact. + iter.for_each(unsafe { unchecked_extender(self) }) } } + } - impl<$($ty_names,)* $($extend_ty_names,)* Iter> $trait_name<$($extend_ty_names),*> for Iter + SpecExtend::extend(collection, iter.into_iter()); +} + +// Implements `Extend` and `FromIterator` for tuples with length larger than one. +macro_rules! impl_extend_tuple { + ($(($ty:tt, $extend_ty:tt, $index:tt)),+) => { + #[doc(hidden)] + #[stable(feature = "extend_for_tuple", since = "1.56.0")] + impl<$($ty,)+ $($extend_ty,)+> Extend<($($ty,)+)> for ($($extend_ty,)+) where - $($extend_ty_names: Extend<$ty_names>,)* - Iter: TrustedLen, + $($extend_ty: Extend<$ty>,)+ { - fn extend(self, $($var_names: &mut $extend_ty_names,)*) { - fn extend<'a, $($ty_names,)*>( - $($var_names: &'a mut impl Extend<$ty_names>,)* - ) -> impl FnMut((), ($($ty_names,)*)) + 'a { - #[allow(non_snake_case)] - // SAFETY: We reserve enough space for the `size_hint`, and the iterator is - // `TrustedLen` so its `size_hint` is exact. - move |(), ($($extend_ty_names,)*)| unsafe { - $($var_names.extend_one_unchecked($extend_ty_names);)* - } - } + fn extend>(&mut self, iter: T) { + default_extend(self, iter) + } - let (lower_bound, upper_bound) = self.size_hint(); + fn extend_one(&mut self, item: ($($ty,)+)) { + $(self.$index.extend_one(item.$index);)+ + } - if upper_bound.is_none() { - // We cannot reserve more than `usize::MAX` items, and this is likely to go out of memory anyway. - $default_fn_name(self, $($var_names,)*); - return; - } + fn extend_reserve(&mut self, additional: usize) { + $(self.$index.extend_reserve(additional);)+ + } - if lower_bound > 0 { - $($var_names.extend_reserve(lower_bound);)* + unsafe fn extend_one_unchecked(&mut self, item: ($($ty,)+)) { + // SAFETY: Those are our safety preconditions, and we correctly forward `extend_reserve`. + unsafe { + $(self.$index.extend_one_unchecked(item.$index);)+ } - - self.fold((), extend($($var_names,)*)); } } - /// This implementation turns an iterator of tuples into a tuple of types which implement - /// [`Default`] and [`Extend`]. - /// - /// This is similar to [`Iterator::unzip`], but is also composable with other [`FromIterator`] - /// implementations: - /// - /// ```rust - /// # fn main() -> Result<(), core::num::ParseIntError> { - /// let string = "1,2,123,4"; - /// - /// // Example given for a 2-tuple, but 1- through 12-tuples are supported - /// let (numbers, lengths): (Vec<_>, Vec<_>) = string - /// .split(',') - /// .map(|s| s.parse().map(|n: u32| (n, s.len()))) - /// .collect::>()?; - /// - /// assert_eq!(numbers, [1, 2, 123, 4]); - /// assert_eq!(lengths, [1, 1, 3, 1]); - /// # Ok(()) } - /// ``` - #[$meta] - $(#[$doctext])? + #[doc(hidden)] #[stable(feature = "from_iterator_for_tuple", since = "1.79.0")] - impl<$($ty_names,)* $($extend_ty_names,)*> FromIterator<($($extend_ty_names,)*)> for ($($ty_names,)*) + impl<$($ty,)+ $($extend_ty,)+> FromIterator<($($ty,)+)> for ($($extend_ty,)+) where - $($ty_names: Default + Extend<$extend_ty_names>,)* + $($extend_ty: Default + Extend<$ty>,)+ { - fn from_iter>(iter: Iter) -> Self { - let mut res = <($($ty_names,)*)>::default(); + fn from_iter>(iter: Iter) -> Self { + let mut res = Self::default(); res.extend(iter); - res } } - }; } -spec_tuple_impl!( - (L, l, EL, TraitL, default_extend_tuple_l, 11), - (K, k, EK, TraitK, default_extend_tuple_k, 10), - (J, j, EJ, TraitJ, default_extend_tuple_j, 9), - (I, i, EI, TraitI, default_extend_tuple_i, 8), - (H, h, EH, TraitH, default_extend_tuple_h, 7), - (G, g, EG, TraitG, default_extend_tuple_g, 6), - (F, f, EF, TraitF, default_extend_tuple_f, 5), - (E, e, EE, TraitE, default_extend_tuple_e, 4), - (D, d, ED, TraitD, default_extend_tuple_d, 3), - (C, c, EC, TraitC, default_extend_tuple_c, 2), - (B, b, EB, TraitB, default_extend_tuple_b, 1), - (A, a, EA, TraitA, default_extend_tuple_a, 0), +impl_extend_tuple!((A, ExA, 0), (B, ExB, 1)); +impl_extend_tuple!((A, ExA, 0), (B, ExB, 1), (C, ExC, 2)); +impl_extend_tuple!((A, ExA, 0), (B, ExB, 1), (C, ExC, 2), (D, ExD, 3)); +impl_extend_tuple!((A, ExA, 0), (B, ExB, 1), (C, ExC, 2), (D, ExD, 3), (E, ExE, 4)); +impl_extend_tuple!((A, ExA, 0), (B, ExB, 1), (C, ExC, 2), (D, ExD, 3), (E, ExE, 4), (F, ExF, 5)); +impl_extend_tuple!( + (A, ExA, 0), + (B, ExB, 1), + (C, ExC, 2), + (D, ExD, 3), + (E, ExE, 4), + (F, ExF, 5), + (G, ExG, 6) +); +impl_extend_tuple!( + (A, ExA, 0), + (B, ExB, 1), + (C, ExC, 2), + (D, ExD, 3), + (E, ExE, 4), + (F, ExF, 5), + (G, ExG, 6), + (H, ExH, 7) +); +impl_extend_tuple!( + (A, ExA, 0), + (B, ExB, 1), + (C, ExC, 2), + (D, ExD, 3), + (E, ExE, 4), + (F, ExF, 5), + (G, ExG, 6), + (H, ExH, 7), + (I, ExI, 8) +); +impl_extend_tuple!( + (A, ExA, 0), + (B, ExB, 1), + (C, ExC, 2), + (D, ExD, 3), + (E, ExE, 4), + (F, ExF, 5), + (G, ExG, 6), + (H, ExH, 7), + (I, ExI, 8), + (J, ExJ, 9) +); +impl_extend_tuple!( + (A, ExA, 0), + (B, ExB, 1), + (C, ExC, 2), + (D, ExD, 3), + (E, ExE, 4), + (F, ExF, 5), + (G, ExG, 6), + (H, ExH, 7), + (I, ExI, 8), + (J, ExJ, 9), + (K, ExK, 10) +); +impl_extend_tuple!( + (A, ExA, 0), + (B, ExB, 1), + (C, ExC, 2), + (D, ExD, 3), + (E, ExE, 4), + (F, ExF, 5), + (G, ExG, 6), + (H, ExH, 7), + (I, ExI, 8), + (J, ExJ, 9), + (K, ExK, 10), + (L, ExL, 11) ); From 5d29ad7fae0afc752e47288952ab219f7a4844aa Mon Sep 17 00:00:00 2001 From: Frank Steffahn Date: Sat, 22 Feb 2025 01:09:51 +0100 Subject: [PATCH 002/358] Add tests for `Extend<(T, U)> for (ExtendT, ExtendU)` ordering of side-effects to `coretest`. --- coretests/tests/iter/traits/iterator.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/coretests/tests/iter/traits/iterator.rs b/coretests/tests/iter/traits/iterator.rs index e31d2e15b6d7e..5ef1f797ae55d 100644 --- a/coretests/tests/iter/traits/iterator.rs +++ b/coretests/tests/iter/traits/iterator.rs @@ -1,3 +1,5 @@ +use core::cell::RefCell; +use core::iter::zip; use core::num::NonZero; /// A wrapper struct that implements `Eq` and `Ord` based on the wrapped @@ -642,6 +644,26 @@ fn test_collect_for_tuples() { assert!(e.2 == d); } +#[test] +fn test_extend_for_tuple_side_effects_order() { + struct TrackingExtender<'a, T>(&'static str, &'a RefCell)>>, Vec); + impl Extend for TrackingExtender<'_, T> { + fn extend>(&mut self, i: I) { + let items = Vec::from_iter(i); + self.1.borrow_mut().push((self.0, items.clone())); + self.2.extend(items); + } + } + + let effects = RefCell::new(vec![]); + let l = TrackingExtender("l", &effects, vec![]); + let r = TrackingExtender("r", &effects, vec![]); + let mut p = ((l, r), ()); + p.extend(zip([(1, 2), (3, 4)], [(), ()])); + let effects = effects.into_inner(); + assert_eq!(effects, [("l", vec![1]), ("r", vec![2]), ("l", vec![3]), ("r", vec![4])]); +} + // just tests by whether or not this compiles fn _empty_impl_all_auto_traits() { use std::panic::{RefUnwindSafe, UnwindSafe}; From bfa6fa4068b137c09ad7ff6061b8d94b5a1ab23e Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Fri, 23 May 2025 13:05:50 +0000 Subject: [PATCH 003/358] Add `FromIterator` impls for `ascii::Char`s to `String`s --- alloc/src/string.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/alloc/src/string.rs b/alloc/src/string.rs index 37614a7ca4571..b383d4d1ae517 100644 --- a/alloc/src/string.rs +++ b/alloc/src/string.rs @@ -2373,6 +2373,28 @@ impl<'a> FromIterator> for String { } } +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "ascii_char", issue = "110998")] +impl FromIterator for String { + fn from_iter>(iter: T) -> Self { + let buf = iter.into_iter().map(core::ascii::Char::to_u8).collect(); + // SAFETY: `buf` is guaranteed to be valid UTF-8 because the `core::ascii::Char` type + // only contains ASCII values (0x00-0x7F), which are valid UTF-8. + unsafe { String::from_utf8_unchecked(buf) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "ascii_char", issue = "110998")] +impl<'a> FromIterator<&'a core::ascii::Char> for String { + fn from_iter>(iter: T) -> Self { + let buf = iter.into_iter().copied().map(core::ascii::Char::to_u8).collect(); + // SAFETY: `buf` is guaranteed to be valid UTF-8 because the `core::ascii::Char` type + // only contains ASCII values (0x00-0x7F), which are valid UTF-8. + unsafe { String::from_utf8_unchecked(buf) } + } +} + #[cfg(not(no_global_oom_handling))] #[stable(feature = "rust1", since = "1.0.0")] impl Extend for String { @@ -3200,6 +3222,14 @@ impl<'a> FromIterator for Cow<'a, str> { } } +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "ascii_char", issue = "110998")] +impl<'a> FromIterator for Cow<'a, str> { + fn from_iter>(it: T) -> Self { + Cow::Owned(FromIterator::from_iter(it)) + } +} + #[stable(feature = "from_string_for_vec_u8", since = "1.14.0")] impl From for Vec { /// Converts the given [`String`] to a vector [`Vec`] that holds values of type [`u8`]. From 6fc44eb38f06c3c0901c3647744a614ecf06a6bc Mon Sep 17 00:00:00 2001 From: "Tim (Theemathas) Chirananthavat" Date: Sat, 8 Mar 2025 16:50:19 +0700 Subject: [PATCH 004/358] Turn `Cow::is_borrowed,is_owned` into associated functions. This is done because `Cow` implements `Deref`. Therefore, to avoid conflicts with an inner type having a method of the same name, we use an associated method, like `Box::into_raw`. --- alloc/src/borrow.rs | 24 ++++++++++++++++-------- alloctests/tests/borrow.rs | 4 ++-- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/alloc/src/borrow.rs b/alloc/src/borrow.rs index 07f51b7614ff8..124b6b05f2a17 100644 --- a/alloc/src/borrow.rs +++ b/alloc/src/borrow.rs @@ -212,6 +212,10 @@ impl Clone for Cow<'_, B> { impl Cow<'_, B> { /// Returns true if the data is borrowed, i.e. if `to_mut` would require additional work. /// + /// Note: this is an associated function, which means that you have to call + /// it as `Cow::is_borrowed(&c)` instead of `c.is_borrowed()`. This is so + /// that there is no conflict with a method on the inner type. + /// /// # Examples /// /// ``` @@ -219,14 +223,14 @@ impl Cow<'_, B> { /// use std::borrow::Cow; /// /// let cow = Cow::Borrowed("moo"); - /// assert!(cow.is_borrowed()); + /// assert!(Cow::is_borrowed(&cow)); /// /// let bull: Cow<'_, str> = Cow::Owned("...moo?".to_string()); - /// assert!(!bull.is_borrowed()); + /// assert!(!Cow::is_borrowed(&bull)); /// ``` #[unstable(feature = "cow_is_borrowed", issue = "65143")] - pub const fn is_borrowed(&self) -> bool { - match *self { + pub const fn is_borrowed(c: &Self) -> bool { + match *c { Borrowed(_) => true, Owned(_) => false, } @@ -234,6 +238,10 @@ impl Cow<'_, B> { /// Returns true if the data is owned, i.e. if `to_mut` would be a no-op. /// + /// Note: this is an associated function, which means that you have to call + /// it as `Cow::is_owned(&c)` instead of `c.is_owned()`. This is so that + /// there is no conflict with a method on the inner type. + /// /// # Examples /// /// ``` @@ -241,14 +249,14 @@ impl Cow<'_, B> { /// use std::borrow::Cow; /// /// let cow: Cow<'_, str> = Cow::Owned("moo".to_string()); - /// assert!(cow.is_owned()); + /// assert!(Cow::is_owned(&cow)); /// /// let bull = Cow::Borrowed("...moo?"); - /// assert!(!bull.is_owned()); + /// assert!(!Cow::is_owned(&bull)); /// ``` #[unstable(feature = "cow_is_borrowed", issue = "65143")] - pub const fn is_owned(&self) -> bool { - !self.is_borrowed() + pub const fn is_owned(c: &Self) -> bool { + !Cow::is_borrowed(c) } /// Acquires a mutable reference to the owned form of the data. diff --git a/alloctests/tests/borrow.rs b/alloctests/tests/borrow.rs index af7efb7d78223..19695d424db2d 100644 --- a/alloctests/tests/borrow.rs +++ b/alloctests/tests/borrow.rs @@ -52,9 +52,9 @@ fn cow_const() { const COW: Cow<'_, str> = Cow::Borrowed("moo"); - const IS_BORROWED: bool = COW.is_borrowed(); + const IS_BORROWED: bool = Cow::is_borrowed(&COW); assert!(IS_BORROWED); - const IS_OWNED: bool = COW.is_owned(); + const IS_OWNED: bool = Cow::is_owned(&COW); assert!(!IS_OWNED); } From 814d7c41b2743cd2af43615a53c07e95b66bb48d Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Thu, 21 Aug 2025 16:21:28 -0400 Subject: [PATCH 005/358] implement Extend<{Group, Literal, Punct, Ident}> for TokenStream --- proc_macro/src/lib.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/proc_macro/src/lib.rs b/proc_macro/src/lib.rs index 162b4fdcc8ae2..8178af15b7393 100644 --- a/proc_macro/src/lib.rs +++ b/proc_macro/src/lib.rs @@ -377,6 +377,21 @@ impl Extend for TokenStream { } } +macro_rules! extend_items { + ($($item:ident)*) => { + $( + #[stable(feature = "token_stream_extend_tt_items", since = "CURRENT_RUSTC_VERSION")] + impl Extend<$item> for TokenStream { + fn extend>(&mut self, iter: T) { + self.extend(iter.into_iter().map(|i| TokenTree::$item(i))); + } + } + )* + }; +} + +extend_items!(Group Literal Punct Ident); + /// Public implementation details for the `TokenStream` type, such as iterators. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub mod token_stream { From 29bf4a6a19790b3aa3a8d13e6cf3fc7bea7d828c Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sat, 23 Aug 2025 09:01:38 -0400 Subject: [PATCH 006/358] stabilize unstable `rwlock_downgrade` feature Signed-off-by: Connor Tsui --- std/src/sync/nonpoison/rwlock.rs | 5 +---- std/src/sync/poison/rwlock.rs | 6 +----- std/tests/sync/lib.rs | 1 - 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/std/src/sync/nonpoison/rwlock.rs b/std/src/sync/nonpoison/rwlock.rs index eb0aef99cc1e7..0e0d7e058b077 100644 --- a/std/src/sync/nonpoison/rwlock.rs +++ b/std/src/sync/nonpoison/rwlock.rs @@ -638,7 +638,6 @@ impl<'rwlock, T: ?Sized> RwLockWriteGuard<'rwlock, T> { /// /// ``` /// #![feature(nonpoison_rwlock)] - /// #![feature(rwlock_downgrade)] /// /// use std::sync::nonpoison::{RwLock, RwLockWriteGuard}; /// @@ -657,7 +656,6 @@ impl<'rwlock, T: ?Sized> RwLockWriteGuard<'rwlock, T> { /// /// ``` /// #![feature(nonpoison_rwlock)] - /// #![feature(rwlock_downgrade)] /// /// use std::sync::Arc; /// use std::sync::nonpoison::{RwLock, RwLockWriteGuard}; @@ -690,8 +688,7 @@ impl<'rwlock, T: ?Sized> RwLockWriteGuard<'rwlock, T> { /// # let final_check = rw.read(); /// # assert_eq!(*final_check, 3); /// ``` - #[unstable(feature = "rwlock_downgrade", issue = "128203")] - // #[unstable(feature = "nonpoison_rwlock", issue = "134645")] + #[unstable(feature = "nonpoison_rwlock", issue = "134645")] pub fn downgrade(s: Self) -> RwLockReadGuard<'rwlock, T> { let lock = s.lock; diff --git a/std/src/sync/poison/rwlock.rs b/std/src/sync/poison/rwlock.rs index 0a463f3f9c7e3..1d52992ec2e0a 100644 --- a/std/src/sync/poison/rwlock.rs +++ b/std/src/sync/poison/rwlock.rs @@ -813,8 +813,6 @@ impl<'rwlock, T: ?Sized> RwLockWriteGuard<'rwlock, T> { /// `downgrade` takes ownership of the `RwLockWriteGuard` and returns a [`RwLockReadGuard`]. /// /// ``` - /// #![feature(rwlock_downgrade)] - /// /// use std::sync::{RwLock, RwLockWriteGuard}; /// /// let rw = RwLock::new(0); @@ -831,8 +829,6 @@ impl<'rwlock, T: ?Sized> RwLockWriteGuard<'rwlock, T> { /// thread calling `downgrade` and any reads it performs after downgrading. /// /// ``` - /// #![feature(rwlock_downgrade)] - /// /// use std::sync::{Arc, RwLock, RwLockWriteGuard}; /// /// let rw = Arc::new(RwLock::new(1)); @@ -863,7 +859,7 @@ impl<'rwlock, T: ?Sized> RwLockWriteGuard<'rwlock, T> { /// # let final_check = rw.read().unwrap(); /// # assert_eq!(*final_check, 3); /// ``` - #[unstable(feature = "rwlock_downgrade", issue = "128203")] + #[stable(feature = "rwlock_downgrade", since = "CURRENT_RUSTC_VERSION")] pub fn downgrade(s: Self) -> RwLockReadGuard<'rwlock, T> { let lock = s.lock; diff --git a/std/tests/sync/lib.rs b/std/tests/sync/lib.rs index f874c2ba38951..d44a4d6935743 100644 --- a/std/tests/sync/lib.rs +++ b/std/tests/sync/lib.rs @@ -4,7 +4,6 @@ #![feature(once_cell_try)] #![feature(lock_value_accessors)] #![feature(reentrant_lock)] -#![feature(rwlock_downgrade)] #![feature(std_internals)] #![feature(sync_nonpoison)] #![feature(nonpoison_mutex)] From 0ed2dd9261412b5c13923439ea0ab9a3110dbd90 Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Tue, 26 Aug 2025 17:10:08 +0000 Subject: [PATCH 007/358] Update library/proc_macro/src/lib.rs Co-authored-by: David Tolnay --- proc_macro/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proc_macro/src/lib.rs b/proc_macro/src/lib.rs index 8178af15b7393..97dc482689781 100644 --- a/proc_macro/src/lib.rs +++ b/proc_macro/src/lib.rs @@ -383,7 +383,7 @@ macro_rules! extend_items { #[stable(feature = "token_stream_extend_tt_items", since = "CURRENT_RUSTC_VERSION")] impl Extend<$item> for TokenStream { fn extend>(&mut self, iter: T) { - self.extend(iter.into_iter().map(|i| TokenTree::$item(i))); + self.extend(iter.into_iter().map(TokenTree::$item)); } } )* From 0f91ae208716eb732af8b0eb3ba031691bcef4b3 Mon Sep 17 00:00:00 2001 From: Oleksandr Babak Date: Sun, 31 Aug 2025 14:34:29 +0200 Subject: [PATCH 008/358] feat: add `from_fn_ptr` to `Waker` and `LocalWaker` --- core/src/task/wake.rs | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/core/src/task/wake.rs b/core/src/task/wake.rs index bb7efe582f7a3..178717fe42eac 100644 --- a/core/src/task/wake.rs +++ b/core/src/task/wake.rs @@ -584,6 +584,28 @@ impl Waker { pub fn vtable(&self) -> &'static RawWakerVTable { self.waker.vtable } + + /// Constructs a `Waker` from a function pointer. + #[inline] + #[must_use] + #[unstable(feature = "waker_from_fn_ptr", issue = "146055")] + pub const fn from_fn_ptr(f: fn()) -> Self { + // SAFETY: Unsafe is used for transmutes, pointer came from `fn()` so it + // is sound to transmute it back to `fn()`. + static VTABLE: RawWakerVTable = unsafe { + RawWakerVTable::new( + |this| RawWaker::new(this, &VTABLE), + |this| transmute::<*const (), fn()>(this)(), + |this| transmute::<*const (), fn()>(this)(), + |_| {}, + ) + }; + let raw = RawWaker::new(f as *const (), &VTABLE); + + // SAFETY: `clone` is just a copy, `drop` is a no-op while `wake` and + // `wake_by_ref` just call the function pointer. + unsafe { Self::from_raw(raw) } + } } #[stable(feature = "futures_api", since = "1.36.0")] @@ -879,6 +901,28 @@ impl LocalWaker { pub fn vtable(&self) -> &'static RawWakerVTable { self.waker.vtable } + + /// Constructs a `LocalWaker` from a function pointer. + #[inline] + #[must_use] + #[unstable(feature = "waker_from_fn_ptr", issue = "146055")] + pub const fn from_fn_ptr(f: fn()) -> Self { + // SAFETY: Unsafe is used for transmutes, pointer came from `fn()` so it + // is sound to transmute it back to `fn()`. + static VTABLE: RawWakerVTable = unsafe { + RawWakerVTable::new( + |this| RawWaker::new(this, &VTABLE), + |this| transmute::<*const (), fn()>(this)(), + |this| transmute::<*const (), fn()>(this)(), + |_| {}, + ) + }; + let raw = RawWaker::new(f as *const (), &VTABLE); + + // SAFETY: `clone` is just a copy, `drop` is a no-op while `wake` and + // `wake_by_ref` just call the function pointer. + unsafe { Self::from_raw(raw) } + } } #[unstable(feature = "local_waker", issue = "118959")] impl Clone for LocalWaker { From 2282417c7a63664d7175d048622f14d8c9bef202 Mon Sep 17 00:00:00 2001 From: ltdk Date: Wed, 3 Sep 2025 20:10:43 -0400 Subject: [PATCH 009/358] Unstably constify ptr::drop_in_place and related methods --- core/src/mem/manually_drop.rs | 7 +++++- core/src/mem/maybe_uninit.rs | 13 +++++++++-- core/src/ptr/mod.rs | 8 +++++-- core/src/ptr/mut_ptr.rs | 8 +++++-- core/src/ptr/non_null.rs | 8 +++++-- coretests/tests/lib.rs | 1 + coretests/tests/ptr.rs | 41 ++++++++++++++++++++++++++++++++++- 7 files changed, 76 insertions(+), 10 deletions(-) diff --git a/core/src/mem/manually_drop.rs b/core/src/mem/manually_drop.rs index 8868f05f1b98f..334a4b7119a11 100644 --- a/core/src/mem/manually_drop.rs +++ b/core/src/mem/manually_drop.rs @@ -1,3 +1,4 @@ +use crate::marker::Destruct; use crate::ops::{Deref, DerefMut, DerefPure}; use crate::ptr; @@ -249,7 +250,11 @@ impl ManuallyDrop { /// [pinned]: crate::pin #[stable(feature = "manually_drop", since = "1.20.0")] #[inline] - pub unsafe fn drop(slot: &mut ManuallyDrop) { + #[rustc_const_unstable(feature = "const_drop_in_place", issue = "109342")] + pub const unsafe fn drop(slot: &mut ManuallyDrop) + where + T: [const] Destruct, + { // SAFETY: we are dropping the value pointed to by a mutable reference // which is guaranteed to be valid for writes. // It is up to the caller to make sure that `slot` isn't dropped again. diff --git a/core/src/mem/maybe_uninit.rs b/core/src/mem/maybe_uninit.rs index c160360cfacf9..8d666e9b130c0 100644 --- a/core/src/mem/maybe_uninit.rs +++ b/core/src/mem/maybe_uninit.rs @@ -1,4 +1,5 @@ use crate::any::type_name; +use crate::marker::Destruct; use crate::mem::ManuallyDrop; use crate::{fmt, intrinsics, ptr, slice}; @@ -714,7 +715,11 @@ impl MaybeUninit { /// /// [`assume_init`]: MaybeUninit::assume_init #[stable(feature = "maybe_uninit_extra", since = "1.60.0")] - pub unsafe fn assume_init_drop(&mut self) { + #[rustc_const_unstable(feature = "const_drop_in_place", issue = "109342")] + pub const unsafe fn assume_init_drop(&mut self) + where + T: [const] Destruct, + { // SAFETY: the caller must guarantee that `self` is initialized and // satisfies all invariants of `T`. // Dropping the value in place is safe if that is the case. @@ -1390,7 +1395,11 @@ impl [MaybeUninit] { /// behaviour. #[unstable(feature = "maybe_uninit_slice", issue = "63569")] #[inline(always)] - pub unsafe fn assume_init_drop(&mut self) { + #[rustc_const_unstable(feature = "const_drop_in_place", issue = "109342")] + pub const unsafe fn assume_init_drop(&mut self) + where + T: [const] Destruct, + { if !self.is_empty() { // SAFETY: the caller must guarantee that every element of `self` // is initialized and satisfies all invariants of `T`. diff --git a/core/src/ptr/mod.rs b/core/src/ptr/mod.rs index 6b94088cb5679..8c20b5e58b8f2 100644 --- a/core/src/ptr/mod.rs +++ b/core/src/ptr/mod.rs @@ -403,7 +403,7 @@ use crate::cmp::Ordering; use crate::intrinsics::const_eval_select; -use crate::marker::{FnPtr, PointeeSized}; +use crate::marker::{Destruct, FnPtr, PointeeSized}; use crate::mem::{self, MaybeUninit, SizedTypeProperties}; use crate::num::NonZero; use crate::{fmt, hash, intrinsics, ub_checks}; @@ -801,7 +801,11 @@ pub const unsafe fn write_bytes(dst: *mut T, val: u8, count: usize) { #[lang = "drop_in_place"] #[allow(unconditional_recursion)] #[rustc_diagnostic_item = "ptr_drop_in_place"] -pub unsafe fn drop_in_place(to_drop: *mut T) { +#[rustc_const_unstable(feature = "const_drop_in_place", issue = "109342")] +pub const unsafe fn drop_in_place(to_drop: *mut T) +where + T: [const] Destruct, +{ // Code here does not matter - this is replaced by the // real drop glue by the compiler. diff --git a/core/src/ptr/mut_ptr.rs b/core/src/ptr/mut_ptr.rs index ce6eee4f911ed..1e3bc2939320d 100644 --- a/core/src/ptr/mut_ptr.rs +++ b/core/src/ptr/mut_ptr.rs @@ -1,7 +1,7 @@ use super::*; use crate::cmp::Ordering::{Equal, Greater, Less}; use crate::intrinsics::const_eval_select; -use crate::marker::PointeeSized; +use crate::marker::{Destruct, PointeeSized}; use crate::mem::{self, SizedTypeProperties}; use crate::slice::{self, SliceIndex}; @@ -1438,8 +1438,12 @@ impl *mut T { /// /// [`ptr::drop_in_place`]: crate::ptr::drop_in_place() #[stable(feature = "pointer_methods", since = "1.26.0")] + #[rustc_const_unstable(feature = "const_drop_in_place", issue = "109342")] #[inline(always)] - pub unsafe fn drop_in_place(self) { + pub const unsafe fn drop_in_place(self) + where + T: [const] Destruct, + { // SAFETY: the caller must uphold the safety contract for `drop_in_place`. unsafe { drop_in_place(self) } } diff --git a/core/src/ptr/non_null.rs b/core/src/ptr/non_null.rs index 10f83120428b9..a762e969b52dc 100644 --- a/core/src/ptr/non_null.rs +++ b/core/src/ptr/non_null.rs @@ -1,5 +1,5 @@ use crate::cmp::Ordering; -use crate::marker::{PointeeSized, Unsize}; +use crate::marker::{Destruct, PointeeSized, Unsize}; use crate::mem::{MaybeUninit, SizedTypeProperties}; use crate::num::NonZero; use crate::ops::{CoerceUnsized, DispatchFromDyn}; @@ -1118,7 +1118,11 @@ impl NonNull { /// [`ptr::drop_in_place`]: crate::ptr::drop_in_place() #[inline(always)] #[stable(feature = "non_null_convenience", since = "1.80.0")] - pub unsafe fn drop_in_place(self) { + #[rustc_const_unstable(feature = "const_drop_in_place", issue = "109342")] + pub const unsafe fn drop_in_place(self) + where + T: [const] Destruct, + { // SAFETY: the caller must uphold the safety contract for `drop_in_place`. unsafe { ptr::drop_in_place(self.as_ptr()) } } diff --git a/coretests/tests/lib.rs b/coretests/tests/lib.rs index b5658a9970fee..c8b940cf3728b 100644 --- a/coretests/tests/lib.rs +++ b/coretests/tests/lib.rs @@ -17,6 +17,7 @@ #![feature(clone_to_uninit)] #![feature(const_convert)] #![feature(const_destruct)] +#![feature(const_drop_in_place)] #![feature(const_eval_select)] #![feature(const_ops)] #![feature(const_option_ops)] diff --git a/coretests/tests/ptr.rs b/coretests/tests/ptr.rs index c13fb96a67f92..217e370ee8974 100644 --- a/coretests/tests/ptr.rs +++ b/coretests/tests/ptr.rs @@ -1,6 +1,6 @@ use core::cell::RefCell; use core::marker::Freeze; -use core::mem::MaybeUninit; +use core::mem::{ManuallyDrop, MaybeUninit}; use core::num::NonZero; use core::ptr; use core::ptr::*; @@ -1036,3 +1036,42 @@ fn test_ptr_default() { let default = PtrMutDefaultTest::default(); assert!(default.ptr.is_null()); } + +#[test] +fn test_const_drop_in_place() { + const COUNTER: usize = { + let mut counter = 0; + let counter_ptr = &raw mut counter; + + // only exists to make `Drop` indirect impl + #[allow(dead_code)] + struct Test(Dropped); + + struct Dropped(*mut usize); + impl const Drop for Dropped { + fn drop(&mut self) { + unsafe { + *self.0 += 1; + } + } + } + + let mut one = ManuallyDrop::new(Test(Dropped(counter_ptr))); + let mut two = ManuallyDrop::new(Test(Dropped(counter_ptr))); + let mut three = ManuallyDrop::new(Test(Dropped(counter_ptr))); + assert!(counter == 0); + unsafe { + ManuallyDrop::drop(&mut one); + } + assert!(counter == 1); + unsafe { + ManuallyDrop::drop(&mut two); + } + assert!(counter == 2); + unsafe { + ManuallyDrop::drop(&mut three); + } + counter + }; + assert_eq!(COUNTER, 3); +} From e0fa184e51c780cb8b34363b07bafc0569012949 Mon Sep 17 00:00:00 2001 From: Kivooeo Date: Sat, 6 Sep 2025 15:45:09 +0000 Subject: [PATCH 010/358] uncommented u64 impl --- core/src/num/bignum.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/num/bignum.rs b/core/src/num/bignum.rs index e33f58197bba5..1decbc2a04673 100644 --- a/core/src/num/bignum.rs +++ b/core/src/num/bignum.rs @@ -59,8 +59,7 @@ impl_full_ops! { u8: add(intrinsics::u8_add_with_overflow), mul/div(u16); u16: add(intrinsics::u16_add_with_overflow), mul/div(u32); u32: add(intrinsics::u32_add_with_overflow), mul/div(u64); - // See RFC #521 for enabling this. - // u64: add(intrinsics::u64_add_with_overflow), mul/div(u128); + u64: add(intrinsics::u64_add_with_overflow), mul/div(u128); } /// Table of powers of 5 representable in digits. Specifically, the largest {u8, u16, u32} value From 59fc39a1005ca0ba92f04fe9dfacee38e280bc39 Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Sat, 6 Sep 2025 01:14:31 -0400 Subject: [PATCH 011/358] add SliceIndex wrapper types Last and Clamp --- core/src/index.rs | 472 +++++++++++++++++++++++++++++++++++++++ core/src/lib.rs | 1 + core/src/range.rs | 12 + core/src/slice/index.rs | 5 + coretests/tests/index.rs | 83 +++++++ coretests/tests/lib.rs | 4 + 6 files changed, 577 insertions(+) create mode 100644 core/src/index.rs create mode 100644 coretests/tests/index.rs diff --git a/core/src/index.rs b/core/src/index.rs new file mode 100644 index 0000000000000..3baefdf10cecb --- /dev/null +++ b/core/src/index.rs @@ -0,0 +1,472 @@ +#![unstable(feature = "sliceindex_wrappers", issue = "146179")] + +//! Helper types for indexing slices. + +use crate::intrinsics::slice_get_unchecked; +use crate::slice::SliceIndex; +use crate::{cmp, ops, range}; + +/// Clamps an index, guaranteeing that it will only access valid elements of the slice. +/// +/// # Examples +/// +/// ``` +/// #![feature(sliceindex_wrappers)] +/// +/// use core::index::Clamp; +/// +/// let s: &[usize] = &[0, 1, 2, 3]; +/// +/// assert_eq!(&3, &s[Clamp(6)]); +/// assert_eq!(&[1, 2, 3], &s[Clamp(1..6)]); +/// assert_eq!(&[] as &[usize], &s[Clamp(5..6)]); +/// assert_eq!(&[0, 1, 2, 3], &s[Clamp(..6)]); +/// assert_eq!(&[0, 1, 2, 3], &s[Clamp(..=6)]); +/// assert_eq!(&[] as &[usize], &s[Clamp(6..)]); +/// ``` +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +#[derive(Debug)] +pub struct Clamp(pub Idx); + +/// Always accesses the last element of the slice. +/// +/// # Examples +/// +/// ``` +/// #![feature(sliceindex_wrappers)] +/// #![feature(slice_index_methods)] +/// +/// use core::index::Last; +/// use core::slice::SliceIndex; +/// +/// let s = &[0, 1, 2, 3]; +/// +/// assert_eq!(&3, &s[Last]); +/// assert_eq!(None, Last.get(&[] as &[usize])); +/// +/// ``` +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +#[derive(Debug)] +pub struct Last; + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp { + type Output = T; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + slice.get(cmp::min(self.0, slice.len() - 1)) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + slice.get_mut(cmp::min(self.0, slice.len() - 1)) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + // SAFETY: the caller ensures that the slice isn't empty + unsafe { slice_get_unchecked(slice, cmp::min(self.0, slice.len() - 1)) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + // SAFETY: the caller ensures that the slice isn't empty + unsafe { slice_get_unchecked(slice, cmp::min(self.0, slice.len() - 1)) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + &(*slice)[cmp::min(self.0, slice.len() - 1)] + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + &mut (*slice)[cmp::min(self.0, slice.len() - 1)] + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + (start..end).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + (start..end).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + // SAFETY: a range ending before len is always valid + unsafe { (start..end).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + // SAFETY: a range ending before len is always valid + unsafe { (start..end).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + (start..end).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + (start..end).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + (start..end).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + (start..end).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + // SAFETY: a range ending before len is always valid + unsafe { (start..end).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + // SAFETY: a range ending before len is always valid + unsafe { (start..end).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + (start..end).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + let start = cmp::min(self.0.start, slice.len()); + let end = cmp::min(self.0.end, slice.len()); + (start..end).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.last, slice.len() - 1); + (start..=end).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.last, slice.len() - 1); + (start..=end).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.last, slice.len() - 1); + // SAFETY: the caller ensures that the slice isn't empty + unsafe { (start..=end).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.last, slice.len() - 1); + // SAFETY: the caller ensures that the slice isn't empty + unsafe { (start..=end).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.last, slice.len() - 1); + (start..=end).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.last, slice.len() - 1); + (start..=end).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.end, slice.len() - 1); + (start..=end).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.end, slice.len() - 1); + (start..=end).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.end, slice.len() - 1); + // SAFETY: the caller ensures that the slice isn't empty + unsafe { (start..=end).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.end, slice.len() - 1); + // SAFETY: the caller ensures that the slice isn't empty + unsafe { (start..=end).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.end, slice.len() - 1); + (start..=end).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + let start = cmp::min(self.0.start, slice.len() - 1); + let end = cmp::min(self.0.end, slice.len() - 1); + (start..=end).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + (cmp::min(self.0.start, slice.len())..).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + (cmp::min(self.0.start, slice.len())..).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + // SAFETY: a range starting at len is valid + unsafe { (cmp::min(self.0.start, slice.len())..).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + // SAFETY: a range starting at len is valid + unsafe { (cmp::min(self.0.start, slice.len())..).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + (cmp::min(self.0.start, slice.len())..).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + (cmp::min(self.0.start, slice.len())..).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + (cmp::min(self.0.start, slice.len())..).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + (cmp::min(self.0.start, slice.len())..).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + // SAFETY: a range starting at len is valid + unsafe { (cmp::min(self.0.start, slice.len())..).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + // SAFETY: a range starting at len is valid + unsafe { (cmp::min(self.0.start, slice.len())..).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + (cmp::min(self.0.start, slice.len())..).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + (cmp::min(self.0.start, slice.len())..).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + (..cmp::min(self.0.end, slice.len())).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + (..cmp::min(self.0.end, slice.len())).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + // SAFETY: a range ending before len is always valid + unsafe { (..cmp::min(self.0.end, slice.len())).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + // SAFETY: a range ending before len is always valid + unsafe { (..cmp::min(self.0.end, slice.len())).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + (..cmp::min(self.0.end, slice.len())).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + (..cmp::min(self.0.end, slice.len())).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + (..=cmp::min(self.0.last, slice.len() - 1)).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + (..=cmp::min(self.0.last, slice.len() - 1)).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + // SAFETY: the caller ensures that the slice isn't empty + unsafe { (..=cmp::min(self.0.last, slice.len() - 1)).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + // SAFETY: the caller ensures that the slice isn't empty + unsafe { (..=cmp::min(self.0.last, slice.len() - 1)).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + (..=cmp::min(self.0.last, slice.len() - 1)).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + (..=cmp::min(self.0.last, slice.len() - 1)).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp> { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + (..=cmp::min(self.0.end, slice.len() - 1)).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + (..=cmp::min(self.0.end, slice.len() - 1)).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + // SAFETY: the caller ensures that the slice isn't empty + unsafe { (..=cmp::min(self.0.end, slice.len() - 1)).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + // SAFETY: the caller ensures that the slice isn't empty + unsafe { (..=cmp::min(self.0.end, slice.len() - 1)).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + (..=cmp::min(self.0.end, slice.len() - 1)).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + (..=cmp::min(self.0.end, slice.len() - 1)).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Clamp { + type Output = [T]; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + (..).get(slice) + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + (..).get_mut(slice) + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + // SAFETY: RangeFull just returns `slice` here + unsafe { (..).get_unchecked(slice) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + // SAFETY: RangeFull just returns `slice` here + unsafe { (..).get_unchecked_mut(slice) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + (..).index(slice) + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + (..).index_mut(slice) + } +} + +#[unstable(feature = "sliceindex_wrappers", issue = "146179")] +unsafe impl SliceIndex<[T]> for Last { + type Output = T; + + fn get(self, slice: &[T]) -> Option<&Self::Output> { + slice.last() + } + + fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> { + slice.last_mut() + } + + unsafe fn get_unchecked(self, slice: *const [T]) -> *const Self::Output { + // SAFETY: the caller ensures that the slice isn't empty + unsafe { slice_get_unchecked(slice, slice.len() - 1) } + } + + unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut Self::Output { + // SAFETY: the caller ensures that the slice isn't empty + unsafe { slice_get_unchecked(slice, slice.len() - 1) } + } + + fn index(self, slice: &[T]) -> &Self::Output { + // N.B., use intrinsic indexing + &(*slice)[slice.len() - 1] + } + + fn index_mut(self, slice: &mut [T]) -> &mut Self::Output { + // N.B., use intrinsic indexing + &mut (*slice)[slice.len() - 1] + } +} diff --git a/core/src/lib.rs b/core/src/lib.rs index 86a68e18b0af4..db059b86a8c84 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -292,6 +292,7 @@ pub mod cmp; pub mod convert; pub mod default; pub mod error; +pub mod index; pub mod marker; pub mod ops; diff --git a/core/src/range.rs b/core/src/range.rs index a096a8ceafc87..ee8252c177652 100644 --- a/core/src/range.rs +++ b/core/src/range.rs @@ -629,6 +629,18 @@ impl> RangeToInclusive { } } +impl From> for RangeToInclusive { + fn from(value: legacy::RangeToInclusive) -> Self { + Self { last: value.end } + } +} + +impl From> for legacy::RangeToInclusive { + fn from(value: RangeToInclusive) -> Self { + Self { end: value.last } + } +} + // RangeToInclusive cannot impl From> // because underflow would be possible with (..0).into() diff --git a/core/src/slice/index.rs b/core/src/slice/index.rs index a8147d745f3ab..40baff3f4465e 100644 --- a/core/src/slice/index.rs +++ b/core/src/slice/index.rs @@ -134,6 +134,11 @@ mod private_slice_index { impl Sealed for range::RangeFrom {} impl Sealed for ops::IndexRange {} + + #[unstable(feature = "sliceindex_wrappers", issue = "146179")] + impl Sealed for crate::index::Last {} + #[unstable(feature = "sliceindex_wrappers", issue = "146179")] + impl Sealed for crate::index::Clamp where T: Sealed {} } /// A helper trait used for indexing operations. diff --git a/coretests/tests/index.rs b/coretests/tests/index.rs new file mode 100644 index 0000000000000..68e4c841e3226 --- /dev/null +++ b/coretests/tests/index.rs @@ -0,0 +1,83 @@ +use core::index::Clamp; +use core::range; +use core::slice::SliceIndex; + +macro_rules! test_clamp { + ($range:expr, $(($slice:expr, $other:expr)),+) => { + $( + assert_eq!(Clamp($range.clone()).get(&$slice as &[_]), $other.get(&$slice as &[_])); + assert_eq!(Clamp($range.clone()).get_mut(&mut $slice as &mut [_]), $other.get_mut(&mut $slice as &mut [_])); + unsafe { + assert_eq!(&*Clamp($range.clone()).get_unchecked(&$slice as &[_]), &*$other.get_unchecked(&$slice as &[_])); + assert_eq!(&*Clamp($range.clone()).get_unchecked_mut(&mut $slice as &mut [_]), &*$other.get_unchecked_mut(&mut $slice as &mut [_])); + } + assert_eq!(Clamp($range.clone()).index(&$slice as &[_]), $other.index(&$slice as &[_])); + assert_eq!(Clamp($range.clone()).index_mut(&mut $slice as &mut [_]), $other.index_mut(&mut $slice as &mut [_])); + )+ + }; +} + +#[test] +fn test_clamp_usize() { + test_clamp!(2, ([0, 1], 1), ([0, 1, 2], 2)); +} + +#[test] +fn test_clamp_range_range() { + test_clamp!(range::Range::from(1..4), ([0, 1], 1..2), ([0, 1, 2, 3, 4], 1..4), ([0], 1..1)); +} + +#[test] +fn test_clamp_ops_range() { + test_clamp!(1..4, ([0, 1], 1..2), ([0, 1, 2, 3, 4], 1..4), ([0], 1..1)); +} + +#[test] +fn test_clamp_range_range_inclusive() { + test_clamp!( + range::RangeInclusive::from(1..=3), + ([0, 1], 1..=1), + ([0, 1, 2, 3, 4], 1..=3), + ([0], 0..=0) + ); +} + +#[test] +fn test_clamp_ops_range_inclusive() { + test_clamp!(1..=3, ([0, 1], 1..=1), ([0, 1, 2, 3, 4], 1..=3), ([0], 0..=0)); +} + +#[test] +fn test_clamp_range_range_from() { + test_clamp!(range::RangeFrom::from(1..), ([0, 1], 1..), ([0, 1, 2, 3, 4], 1..), ([0], 1..)); +} + +#[test] +fn test_clamp_ops_range_from() { + test_clamp!(1.., ([0, 1], 1..), ([0, 1, 2, 3, 4], 1..), ([0], 1..)); +} + +#[test] +fn test_clamp_range_to() { + test_clamp!(..4, ([0, 1], ..2), ([0, 1, 2, 3, 4], ..4), ([0], ..1)); +} + +#[test] +fn test_clamp_range_range_to_inclusive() { + test_clamp!( + range::RangeToInclusive::from(..=4), + ([0, 1], ..=1), + ([0, 1, 2, 3, 4], ..=4), + ([0], ..=0) + ); +} + +#[test] +fn test_clamp_ops_range_to_inclusive() { + test_clamp!(..=4, ([0, 1], ..=1), ([0, 1, 2, 3, 4], ..=4), ([0], ..=0)); +} + +#[test] +fn test_clamp_range_full() { + test_clamp!(.., ([0, 1], ..), ([0, 1, 2, 3, 4], ..), ([0], ..)); +} diff --git a/coretests/tests/lib.rs b/coretests/tests/lib.rs index 5c519f3a499d2..4d0a7780fe8de 100644 --- a/coretests/tests/lib.rs +++ b/coretests/tests/lib.rs @@ -81,6 +81,7 @@ #![feature(maybe_uninit_write_slice)] #![feature(min_specialization)] #![feature(never_type)] +#![feature(new_range_api)] #![feature(next_index)] #![feature(non_exhaustive_omitted_patterns_lint)] #![feature(numfmt)] @@ -93,9 +94,11 @@ #![feature(ptr_metadata)] #![feature(result_option_map_or_default)] #![feature(slice_from_ptr_range)] +#![feature(slice_index_methods)] #![feature(slice_internals)] #![feature(slice_partition_dedup)] #![feature(slice_split_once)] +#![feature(sliceindex_wrappers)] #![feature(split_array)] #![feature(split_as_slice)] #![feature(std_internals)] @@ -173,6 +176,7 @@ mod fmt; mod future; mod hash; mod hint; +mod index; mod intrinsics; mod io; mod iter; From 59b7dec4c20b238013ffbcac49e80551fd3d9ac6 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Tue, 22 Apr 2025 01:19:14 -0700 Subject: [PATCH 012/358] Implement `Debug` for `EncodeWide` Since `std::os::windows::ffi::EncodeWide` was reexported from `std::sys_common::wtf8::EncodeWide`, which has `#![allow(missing_debug_implementations)]` in the parent module, it did not implement `Debug`. When it was moved to `core`, a placeholder impl was added; fill it in. --- alloc/src/wtf8/tests.rs | 11 +++++++++++ core/src/wtf8.rs | 29 +++++++++++++++++++++++++---- std/src/sys_common/mod.rs | 1 - 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/alloc/src/wtf8/tests.rs b/alloc/src/wtf8/tests.rs index 291f63f9f9e54..a72ad0837d11e 100644 --- a/alloc/src/wtf8/tests.rs +++ b/alloc/src/wtf8/tests.rs @@ -579,6 +579,17 @@ fn wtf8_encode_wide_size_hint() { assert!(iter.next().is_none()); } +#[test] +fn wtf8_encode_wide_debug() { + let mut string = Wtf8Buf::from_str("aé "); + string.push(CodePoint::from_u32(0xD83D).unwrap()); + string.push_char('💩'); + assert_eq!( + format!("{:?}", string.encode_wide()), + r#"EncodeWide(['a', 'é', ' ', 0xD83D, 0xD83D, 0xDCA9])"# + ); +} + #[test] fn wtf8_clone_into() { let mut string = Wtf8Buf::new(); diff --git a/core/src/wtf8.rs b/core/src/wtf8.rs index de0dfa560a3f3..0c03496c5e367 100644 --- a/core/src/wtf8.rs +++ b/core/src/wtf8.rs @@ -562,15 +562,36 @@ impl Iterator for EncodeWide<'_> { } } +#[stable(feature = "encode_wide_fused_iterator", since = "1.62.0")] +impl FusedIterator for EncodeWide<'_> {} + +#[stable(feature = "encode_wide_debug", since = "CURRENT_RUSTC_VERSION")] impl fmt::Debug for EncodeWide<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("EncodeWide").finish_non_exhaustive() + struct CodeUnit(u16); + impl fmt::Debug for CodeUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // This output attempts to balance readability with precision. + // Render characters which take only one WTF-16 code unit using + // `char` syntax and everything else as code units with hex + // integer syntax (including paired and unpaired surrogate + // halves). Since Rust has no `char`-like type for WTF-16, this + // isn't perfect, so if this output isn't suitable, it is open + // to being changed (see #140153). + match char::from_u32(self.0 as u32) { + Some(c) => write!(f, "{c:?}"), + None => write!(f, "0x{:04X}", self.0), + } + } + } + + write!(f, "EncodeWide(")?; + f.debug_list().entries(self.clone().map(CodeUnit)).finish()?; + write!(f, ")")?; + Ok(()) } } -#[stable(feature = "encode_wide_fused_iterator", since = "1.62.0")] -impl FusedIterator for EncodeWide<'_> {} - impl Hash for CodePoint { #[inline] fn hash(&self, state: &mut H) { diff --git a/std/src/sys_common/mod.rs b/std/src/sys_common/mod.rs index ec45c723e0de5..c6cb1b006e8c2 100644 --- a/std/src/sys_common/mod.rs +++ b/std/src/sys_common/mod.rs @@ -15,7 +15,6 @@ //! Progress on this is tracked in #84187. #![allow(missing_docs)] -#![allow(missing_debug_implementations)] #[cfg(test)] mod tests; From 2b21ffc2e59330dd7ea07e0debab59d4a0341519 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 18 Sep 2025 08:07:51 +0800 Subject: [PATCH 013/358] Add `is_ascii` function optimized for LoongArch64 for [u8] Similar to x86_64, on LoongArch64 we use the `vmskltz.b` instruction to test the high bit in a lane. For longer input cases, the performance improvement is significant. For unaligned cases close to 32 bytes in length, there's some regression, but it seems acceptable. | core benches (MB/s) | Before | After | % | |--------------------------------------------------------|--------|--------|---------| | ascii::is_ascii::short::case00_libcore | 1000 | 1000 | 0.00 | | ascii::is_ascii::medium::case00_libcore | 8000 | 8000 | 0.00 | | ascii::is_ascii::long::case00_libcore | 183947 | 436875 | +137.50 | | ascii::is_ascii::unaligned_head_medium::case00_libcore | 7750 | 2818 | -63.64 | | ascii::is_ascii::unaligned_head_long::case00_libcore | 317681 | 436812 | +37.50 | | ascii::is_ascii::unaligned_tail_medium::case00_libcore | 7750 | 3444 | -55.56 | | ascii::is_ascii::unaligned_tail_long::case00_libcore | 155311 | 436812 | +181.25 | | ascii::is_ascii::unaligned_both_medium::case00_libcore | 7500 | 3333 | -55.56 | | ascii::is_ascii::unaligned_both_long::case00_libcore | 174700 | 436750 | +150.00 | --- core/src/slice/ascii.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/core/src/slice/ascii.rs b/core/src/slice/ascii.rs index e17a2e03d2dc4..d02be440f5bfb 100644 --- a/core/src/slice/ascii.rs +++ b/core/src/slice/ascii.rs @@ -3,7 +3,10 @@ use core::ascii::EscapeDefault; use crate::fmt::{self, Write}; -#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2")))] +#[cfg(not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") +)))] use crate::intrinsics::const_eval_select; use crate::{ascii, iter, ops}; @@ -357,7 +360,10 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool { /// /// If any of these loads produces something for which `contains_nonascii` /// (above) returns true, then we know the answer is false. -#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2")))] +#[cfg(not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") +)))] #[inline] #[rustc_allow_const_fn_unstable(const_eval_select)] // fallback impl has same behavior const fn is_ascii(s: &[u8]) -> bool { @@ -455,12 +461,15 @@ const fn is_ascii(s: &[u8]) -> bool { ) } -/// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64` -/// platforms. +/// ASCII test optimized to use the `pmovmskb` instruction on `x86-64` and the +/// `vmskltz.b` instruction on `loongarch64`. /// /// Other platforms are not likely to benefit from this code structure, so they /// use SWAR techniques to test for ASCII in `usize`-sized chunks. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +#[cfg(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") +))] #[inline] const fn is_ascii(bytes: &[u8]) -> bool { // Process chunks of 32 bytes at a time in the fast path to enable From 28888837a32d76b6317fd7c22666b0b7ecc15133 Mon Sep 17 00:00:00 2001 From: usamoi Date: Sat, 20 Sep 2025 17:54:17 +0800 Subject: [PATCH 014/358] intrinsic-test: test intrinsics with patched core_arch --- stdarch/crates/intrinsic-test/src/arm/config.rs | 4 ++-- stdarch/crates/intrinsic-test/src/common/gen_rust.rs | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/config.rs b/stdarch/crates/intrinsic-test/src/arm/config.rs index 72e997de154ab..ba5c22b22b031 100644 --- a/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -125,8 +125,8 @@ pub const AARCH_CONFIGURATIONS: &str = r#" #![feature(stdarch_neon_f16)] #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] -use core::arch::aarch64::*; +use core_arch::arch::aarch64::*; #[cfg(target_arch = "arm")] -use core::arch::arm::*; +use core_arch::arch::arm::*; "#; diff --git a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs index c6b964a9ce4e4..d659cbc4aaa76 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -37,6 +37,7 @@ pub fn write_bin_cargo_toml( write_cargo_toml_header(w, "intrinsic-test-programs")?; writeln!(w, "[dependencies]")?; + writeln!(w, "core_arch = {{ path = \"../crates/core_arch\" }}")?; for i in 0..module_count { writeln!(w, "mod_{i} = {{ path = \"mod_{i}/\" }}")?; From 8cc654e488161bf2a27befa8c1f3d1c923734507 Mon Sep 17 00:00:00 2001 From: Joshua Liebow-Feeser Date: Tue, 29 Apr 2025 07:55:37 -0700 Subject: [PATCH 015/358] Document MaybeUninit bit validity Co-authored-by: Ralf Jung Edited-by: TC --- core/src/mem/maybe_uninit.rs | 48 ++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/core/src/mem/maybe_uninit.rs b/core/src/mem/maybe_uninit.rs index c160360cfacf9..47a34821e9018 100644 --- a/core/src/mem/maybe_uninit.rs +++ b/core/src/mem/maybe_uninit.rs @@ -252,6 +252,54 @@ use crate::{fmt, intrinsics, ptr, slice}; /// std::process::exit(*code); // UB! Accessing uninitialized memory. /// } /// ``` +/// +/// # Validity +/// +/// A `MaybeUninit` has no validity requirement – any sequence of +/// [bytes][reference-byte] of the appropriate length, initialized or +/// uninitialized, are a valid representation of `MaybeUninit`. +/// +/// However, "round-tripping" via `MaybeUninit` does not always result in the +/// original value. `MaybeUninit` can have padding, and the contents of that +/// padding are not preserved. Concretely, given distinct `T` and `U` where +/// `size_of::() == size_of::()`, the following code is not guaranteed to +/// be sound: +/// +/// ```rust,no_run +/// # use core::mem::{MaybeUninit, transmute}; +/// # struct T; struct U; +/// fn identity(t: T) -> T { +/// unsafe { +/// let u: MaybeUninit = transmute(t); +/// transmute(u) +/// } +/// } +/// ``` +/// +/// If the representation of `t` contains initialized bytes at byte offsets +/// where `U` contains padding bytes, these may not be preserved in +/// `MaybeUninit`. Transmuting `u` back to `T` (i.e., `transmute(u)` above) +/// may thus be undefined behavior or yield a value different from `t` due to +/// those bytes being lost. This is an active area of discussion, and this code +/// may become sound in the future. +/// +/// However, so long as no such byte offsets exist, then the preceding +/// `identity` example *is* sound. In particular, since `[u8; N]` has no padding +/// bytes, transmuting `t` to `MaybeUninit<[u8; size_of::]>` and back will +/// always produce the original value `t` again. This is true even if `t` +/// contains [provenance]: the resulting value will have the same provenance as +/// the original `t`. +/// +/// Note a potential footgun: if `t` contains a reference, then there may be +/// implicit reborrows of the reference any time it is copied, which may alter +/// its provenance. In that case, the value returned by `identity` may not be +/// exactly the same as its argument. However, even in this case, it remains +/// true that `identity` behaves the same as a function that just returns `t` +/// immediately (i.e., `fn identity(t: T) -> T { t }`). +/// +/// [provenance]: crate::ptr#provenance +/// +/// [reference-byte]: ../../reference/memory-model.html#bytes #[stable(feature = "maybe_uninit", since = "1.36.0")] // Lang item so we can wrap other types in it. This is useful for coroutines. #[lang = "maybe_uninit"] From ca194765cfd4351b0bf6086c8d50145d04227df6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Bj=C3=B8rnager=20Jensen?= Date: Sun, 21 Sep 2025 11:45:27 +0200 Subject: [PATCH 016/358] Stabilise '<[_]>::{rotate_left, rotate_right}'; --- core/src/slice/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/slice/mod.rs b/core/src/slice/mod.rs index dfbb3628350a8..6adeb5f18663f 100644 --- a/core/src/slice/mod.rs +++ b/core/src/slice/mod.rs @@ -3629,7 +3629,7 @@ impl [T] { /// assert_eq!(a, ['a', 'c', 'd', 'e', 'b', 'f']); /// ``` #[stable(feature = "slice_rotate", since = "1.26.0")] - #[rustc_const_unstable(feature = "const_slice_rotate", issue = "143812")] + #[rustc_const_stable(feature = "const_slice_rotate", since = "CURRENT_RUSTC_VERSION")] pub const fn rotate_left(&mut self, mid: usize) { assert!(mid <= self.len()); let k = self.len() - mid; @@ -3675,7 +3675,7 @@ impl [T] { /// assert_eq!(a, ['a', 'e', 'b', 'c', 'd', 'f']); /// ``` #[stable(feature = "slice_rotate", since = "1.26.0")] - #[rustc_const_unstable(feature = "const_slice_rotate", issue = "143812")] + #[rustc_const_stable(feature = "const_slice_rotate", since = "CURRENT_RUSTC_VERSION")] pub const fn rotate_right(&mut self, k: usize) { assert!(k <= self.len()); let mid = self.len() - k; From e3d8472f2c6aba3dd7a9dcda6e10c5efaa4b4507 Mon Sep 17 00:00:00 2001 From: usamoi Date: Tue, 23 Sep 2025 10:05:32 +0800 Subject: [PATCH 017/358] Revert "Remove big-endian swizzles from `vreinterpret`" This reverts commit 24f89ca53d3374ed8d3e0cbadc1dc89eea41acba. --- .../core_arch/src/aarch64/neon/generated.rs | 991 +- .../src/arm_shared/neon/generated.rs | 10283 ++++++++++++++-- .../spec/neon/aarch64.spec.yml | 2 - .../spec/neon/arm_shared.spec.yml | 4 - 4 files changed, 10280 insertions(+), 1000 deletions(-) diff --git a/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 855261aaecfd0..554a809db8db2 100644 --- a/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -21477,73 +21477,172 @@ pub fn vrecpxh_f16(a: f16) -> f16 { #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] +#[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21551,8 +21650,23 @@ pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21560,8 +21674,23 @@ pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21569,8 +21698,22 @@ pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t { + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21578,8 +21721,22 @@ pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t { + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21587,8 +21744,22 @@ pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t { + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21596,6 +21767,19 @@ pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t { + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"] #[inline] #[target_feature(enable = "neon")] @@ -21607,6 +21791,7 @@ pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t { #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21614,8 +21799,22 @@ pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t { + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21623,8 +21822,22 @@ pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t { + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21632,6 +21845,19 @@ pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t { + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"] #[inline] #[target_feature(enable = "neon")] @@ -21643,6 +21869,7 @@ pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t { #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -21650,364 +21877,888 @@ pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t { + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t { +pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { - unsafe { transmute(a) } +pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t { + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { +pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { +pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { +pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { +pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { +pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { + let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { +pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { +pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { +pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { +pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { +pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { +pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { +pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { +pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { +pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t { +pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t { +pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { +pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { +pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { +pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { +pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { +pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t { +pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t { +pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { +pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { +pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { +pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { +pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t { - unsafe { transmute(a) } + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"] @@ -22039,6 +22790,7 @@ pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t { #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -22046,8 +22798,23 @@ pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -22055,8 +22822,23 @@ pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] @@ -22064,14 +22846,43 @@ pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t { unsafe { transmute(a) } } +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point round to 32-bit integer, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"] #[inline] diff --git a/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index e4e4e040f468d..b5ba792b18aec 100644 --- a/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -42089,6 +42089,7 @@ pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42102,8 +42103,9 @@ pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42113,12 +42115,17 @@ pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42128,12 +42135,13 @@ pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { +pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42143,12 +42151,17 @@ pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42158,12 +42171,13 @@ pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { +pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42173,12 +42187,17 @@ pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42188,12 +42207,13 @@ pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { +pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42203,12 +42223,17 @@ pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42218,12 +42243,13 @@ pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { +pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42233,12 +42259,14 @@ pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { +pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42248,12 +42276,13 @@ pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { +pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42263,12 +42292,17 @@ pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42278,12 +42312,13 @@ pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { +pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42293,12 +42328,17 @@ pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42308,12 +42348,13 @@ pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { +pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42323,12 +42364,17 @@ pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42338,12 +42384,13 @@ pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { +pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42353,12 +42400,14 @@ pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { +pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42368,12 +42417,13 @@ pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { +pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42383,12 +42433,17 @@ pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42398,12 +42453,13 @@ pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { +pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42413,12 +42469,17 @@ pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42428,12 +42489,13 @@ pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { +pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42443,12 +42505,17 @@ pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42458,12 +42525,13 @@ pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { +pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42473,12 +42541,21 @@ pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42488,12 +42565,13 @@ pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { +pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42503,12 +42581,17 @@ pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42518,12 +42601,13 @@ pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { +pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42533,12 +42617,17 @@ pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42548,12 +42637,13 @@ pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { +pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42563,12 +42653,17 @@ pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42578,12 +42673,13 @@ pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { +pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42593,12 +42689,21 @@ pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42608,12 +42713,13 @@ pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { +pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42623,12 +42729,17 @@ pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42638,12 +42749,13 @@ pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { +pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42653,12 +42765,17 @@ pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42668,12 +42785,13 @@ pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { +pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42683,12 +42801,33 @@ pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { +pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42698,12 +42837,37 @@ pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { +pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42713,12 +42877,33 @@ pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { +pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42728,12 +42913,33 @@ pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { +pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( @@ -42743,13 +42949,34 @@ pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { +pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -42758,13 +42985,34 @@ pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { +pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -42773,13 +43021,35 @@ pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { +pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -42788,57 +43058,7447 @@ pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { +pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { + unsafe { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { + let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { + let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { + unsafe { transmute(a) } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop) )] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop) )] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { - unsafe { transmute(a) } +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop) )] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42854,12 +50514,21 @@ pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42875,12 +50544,13 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { +pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42896,12 +50566,17 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42917,12 +50592,13 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { +pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42938,12 +50614,17 @@ pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42959,12 +50640,13 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { +pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42980,12 +50662,21 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43001,12 +50692,13 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { +pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43022,12 +50714,17 @@ pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43043,12 +50740,13 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { +pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43064,12 +50762,17 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43085,12 +50788,13 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { +pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43106,12 +50810,17 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43127,12 +50836,13 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { +pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43148,12 +50858,17 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43169,12 +50884,13 @@ pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { +pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43190,12 +50906,17 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43211,12 +50932,13 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { +pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43232,12 +50954,14 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { +pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43253,12 +50977,13 @@ pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { +pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43274,12 +50999,17 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43295,12 +51025,13 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { +pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43316,12 +51047,17 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43337,12 +51073,13 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { +pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43358,12 +51095,14 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { +pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43379,12 +51118,13 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { +pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43400,12 +51140,17 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43421,12 +51166,13 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { +pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43442,12 +51188,17 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43463,12 +51214,13 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { +pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43484,12 +51236,17 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43505,12 +51262,13 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { +pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43526,12 +51284,21 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43547,12 +51314,13 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { +pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43568,12 +51336,17 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43589,12 +51362,13 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { +pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43610,12 +51384,17 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43631,12 +51410,13 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { +pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43652,12 +51432,17 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43673,12 +51458,13 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { +pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43694,12 +51480,21 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43715,12 +51510,13 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { +pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43736,12 +51532,17 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43757,12 +51558,13 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { +pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43778,12 +51580,17 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43799,12 +51606,13 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { +pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43820,12 +51628,21 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43841,12 +51658,13 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { +pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43862,12 +51680,17 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43883,12 +51706,13 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { +pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43904,12 +51728,16 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43925,12 +51753,13 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { +pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43946,12 +51775,16 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43967,12 +51800,13 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { +pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43988,12 +51822,16 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44009,12 +51847,13 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { +pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44030,11 +51869,14 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -44051,12 +51893,13 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { +pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44072,12 +51915,13 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { +pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44093,12 +51937,16 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44114,12 +51962,13 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { +pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44135,12 +51984,16 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44156,12 +52009,13 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { +pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44177,12 +52031,16 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44198,12 +52056,13 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { +pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44219,12 +52078,16 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44240,12 +52103,13 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { +pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44261,12 +52125,16 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44282,12 +52150,13 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { +pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44303,12 +52172,17 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44324,12 +52198,13 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { +pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44345,12 +52220,21 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44366,12 +52250,13 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { +pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44387,12 +52272,17 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44408,12 +52298,13 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { +pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44429,12 +52320,17 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44450,12 +52346,13 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { +pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44471,12 +52368,17 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44492,12 +52394,13 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { +pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44513,12 +52416,21 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44534,12 +52446,13 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { +pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44555,12 +52468,17 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44576,12 +52494,13 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { +pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44597,12 +52516,17 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44618,12 +52542,13 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { +pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44639,12 +52564,21 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44660,12 +52594,13 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { +pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44681,12 +52616,17 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44702,12 +52642,13 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { +pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44723,12 +52664,17 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44744,12 +52690,13 @@ pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { +pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44765,12 +52712,17 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44786,12 +52738,13 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { +pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44807,12 +52760,17 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44828,12 +52786,13 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { +pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44849,12 +52808,17 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44870,12 +52834,13 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { +pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44891,12 +52856,14 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { +pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44912,12 +52879,13 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { +pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44933,12 +52901,17 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44954,12 +52927,13 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { +pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44975,12 +52949,17 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44996,12 +52975,13 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { +pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45017,12 +52997,17 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45038,12 +53023,13 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { +pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45059,12 +53045,14 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { +pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45080,12 +53068,13 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { +pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45101,12 +53090,17 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45122,12 +53116,13 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { +pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45143,12 +53138,18 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45164,12 +53165,13 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { +pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45185,12 +53187,22 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45206,12 +53218,13 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { +pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45227,12 +53240,18 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45248,12 +53267,13 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { +pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45269,12 +53289,18 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45290,12 +53316,13 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { +pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45311,12 +53338,18 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45332,12 +53365,13 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { +pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45353,12 +53387,22 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45374,12 +53418,13 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { +pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45395,12 +53440,18 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45416,12 +53467,13 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { +pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45437,12 +53489,18 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45458,12 +53516,13 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { +pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45479,12 +53538,18 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45500,12 +53565,13 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { +pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45521,12 +53587,18 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45542,12 +53614,13 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { +pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45563,12 +53636,17 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45584,12 +53662,13 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { +pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45605,12 +53684,17 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45626,12 +53710,13 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { +pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45647,12 +53732,17 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45668,12 +53758,13 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { +pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45689,12 +53780,17 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45710,12 +53806,13 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { +pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45731,12 +53828,14 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { +pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45752,12 +53851,13 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { +pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45773,12 +53873,17 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45794,12 +53899,13 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { +pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45815,12 +53921,17 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45836,12 +53947,13 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { +pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45857,12 +53969,17 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45878,12 +53995,13 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { +pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45899,12 +54017,14 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { +pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45920,12 +54040,13 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { +pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45941,12 +54062,17 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45962,12 +54088,13 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { +pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45983,12 +54110,17 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46004,12 +54136,13 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { +pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46025,12 +54158,21 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46046,12 +54188,13 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { +pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46067,12 +54210,17 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46088,12 +54236,13 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { +pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46109,12 +54258,17 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46130,12 +54284,13 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { +pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46151,12 +54306,17 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46172,12 +54332,13 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { +pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46193,12 +54354,21 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46214,12 +54384,13 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { +pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46235,12 +54406,17 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46256,12 +54432,13 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { +pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46277,12 +54454,17 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46298,12 +54480,13 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { +pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46319,12 +54502,17 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46340,12 +54528,13 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { +pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46361,14 +54550,23 @@ pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46382,14 +54580,15 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { +pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46403,14 +54602,22 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46424,14 +54631,15 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { +pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46445,14 +54653,18 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46466,14 +54678,15 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { +pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46487,14 +54700,18 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46508,14 +54725,15 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { +pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46529,14 +54747,18 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { + unsafe { + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46550,14 +54772,15 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { +pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46571,14 +54794,22 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46592,14 +54823,15 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { +pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46613,14 +54845,18 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46634,14 +54870,15 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { +pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46655,14 +54892,18 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46676,14 +54917,15 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { +pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46697,14 +54939,18 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { + unsafe { + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46718,14 +54964,15 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { +pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46739,14 +54986,22 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46760,14 +55015,15 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { +pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46781,14 +55037,18 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46802,14 +55062,15 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { +pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46823,14 +55084,18 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46844,14 +55109,15 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { +pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46865,14 +55131,16 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { +pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { + let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46886,14 +55154,15 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { +pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46907,14 +55176,17 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { +pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46928,14 +55200,15 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { +pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46949,14 +55222,20 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { + let a: int8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46970,14 +55249,15 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { +pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -46991,14 +55271,16 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { +pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { + let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47012,14 +55294,15 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { +pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47033,14 +55316,16 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { +pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47054,14 +55339,15 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { +pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47075,14 +55361,19 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { + let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47096,14 +55387,15 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { +pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47117,14 +55409,16 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { +pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47138,14 +55432,15 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { +pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47159,14 +55454,16 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { +pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47180,14 +55477,15 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { +pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47201,14 +55499,19 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47222,14 +55525,15 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { +pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47243,14 +55547,16 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { +pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { + let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47264,14 +55570,15 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { +pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47285,14 +55592,16 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { +pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { + let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47306,14 +55615,15 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { +pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47327,14 +55637,17 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { +pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47348,14 +55661,15 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { +pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47369,14 +55683,20 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { + let a: uint8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47390,14 +55710,15 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { +pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47411,14 +55732,16 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { +pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { + let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47432,14 +55755,15 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { +pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47453,14 +55777,16 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { +pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47474,14 +55800,15 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { +pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -47495,12 +55822,17 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { + let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47516,12 +55848,13 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { +pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47537,12 +55870,14 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { +pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47558,12 +55893,13 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { +pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47579,12 +55915,14 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { +pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47600,12 +55938,13 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { +pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47621,12 +55960,17 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47642,12 +55986,13 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { +pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47663,12 +56008,14 @@ pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { +pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { + let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47684,12 +56031,13 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { +pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47705,12 +56053,14 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { +pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { + let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47726,12 +56076,13 @@ pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { +pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47747,12 +56098,15 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { +pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47768,12 +56122,13 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { +pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47789,12 +56144,18 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { + let a: poly8x16_t = + unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47810,12 +56171,13 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { +pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47831,12 +56193,14 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { +pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { + let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47852,12 +56216,13 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { +pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47873,12 +56238,14 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { +pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47894,12 +56261,13 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { +pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47915,12 +56283,17 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { + let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47936,12 +56309,13 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { +pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47957,12 +56331,16 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { + unsafe { + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47978,12 +56356,13 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { +pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47999,12 +56378,16 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { + unsafe { + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48020,12 +56403,13 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { +pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48041,12 +56425,16 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { + unsafe { + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48062,12 +56450,13 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { +pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48083,12 +56472,16 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { + unsafe { + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48104,12 +56497,13 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { +pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48125,12 +56519,16 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { + unsafe { + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48146,12 +56544,13 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { +pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48167,12 +56566,16 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { + unsafe { + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48188,12 +56591,13 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { +pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48209,12 +56613,16 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { + unsafe { + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48230,12 +56638,13 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { +pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48251,12 +56660,16 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { + unsafe { + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48272,12 +56685,13 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { +pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48293,12 +56707,14 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { +pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48314,12 +56730,13 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { +pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48335,12 +56752,21 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48356,12 +56782,13 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { +pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48377,12 +56804,17 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48398,12 +56830,13 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { +pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48419,12 +56852,17 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48440,12 +56878,13 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { +pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48461,12 +56900,21 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { - unsafe { transmute(a) } +pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48482,12 +56930,13 @@ pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { +pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48503,12 +56952,17 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48524,12 +56978,13 @@ pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { +pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48545,12 +57000,17 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48566,12 +57026,13 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { +pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48587,12 +57048,21 @@ pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48608,12 +57078,13 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { +pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48630,7 +57101,11 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { - unsafe { transmute(a) } + let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + unsafe { + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Reversing vector elements (swap endianness)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"] diff --git a/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index ccdcea980e1b2..a1a837bc61064 100644 --- a/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -8781,7 +8781,6 @@ intrinsics: - [float64x1_t, float32x2_t] - [float32x4_t, float64x2_t] - [float64x2_t, float32x4_t] - big_endian_inverse: false compose: - FnCall: [transmute, [a]] @@ -8802,7 +8801,6 @@ intrinsics: # q - [float64x2_t, float16x8_t] - [float16x8_t, float64x2_t] - big_endian_inverse: false compose: - FnCall: [transmute, [a]] diff --git a/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 61a3a5853632c..43dd3b9031507 100644 --- a/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -8480,7 +8480,6 @@ intrinsics: - [poly16x8_t, p128] - [int8x16_t, p128] - [uint8x16_t, p128] - big_endian_inverse: false compose: - FnCall: [transmute, [a]] @@ -8718,7 +8717,6 @@ intrinsics: - [poly8x16_t, float32x4_t] - [poly16x8_t, float32x4_t] - [p128, float32x4_t] - big_endian_inverse: false compose: - FnCall: [transmute, [a]] @@ -8782,7 +8780,6 @@ intrinsics: - [float16x8_t, uint16x8_t] - [float16x8_t, uint32x4_t] - [float16x8_t, uint64x2_t] - big_endian_inverse: false compose: - FnCall: [transmute, [a]] @@ -8807,7 +8804,6 @@ intrinsics: - [poly128_t, float16x8_t] - [float16x8_t, poly128_t] - [float16x8_t, poly64x2_t] - big_endian_inverse: false compose: - FnCall: [transmute, [a]] From 48ae68f4ec22076098d647e3483d5f491c781476 Mon Sep 17 00:00:00 2001 From: usamoi Date: Tue, 23 Sep 2025 10:17:54 +0800 Subject: [PATCH 018/358] pick changes from https://github.com/rust-lang/rust/pull/146683 --- .../core_arch/src/aarch64/neon/generated.rs | 12 ++++++------ stdarch/crates/core_arch/src/wasm32/mod.rs | 16 ++++++++-------- .../stdarch-gen-arm/spec/neon/aarch64.spec.yml | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 554a809db8db2..f53f618cd7ea8 100644 --- a/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -10306,7 +10306,7 @@ pub fn vfmad_lane_f64(a: f64, b: f64, c: float64x1_t) -> f64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 { - unsafe { fmaf16(b, c, a) } + fmaf16(b, c, a) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_lane_f16)"] @@ -23259,7 +23259,7 @@ pub fn vrndaq_f64(a: float64x2_t) -> float64x2_t { #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(frinta))] pub fn vrndah_f16(a: f16) -> f16 { - unsafe { roundf16(a) } + roundf16(a) } #[doc = "Floating-point round to integral, to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"] @@ -23269,7 +23269,7 @@ pub fn vrndah_f16(a: f16) -> f16 { #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(frintz))] pub fn vrndh_f16(a: f16) -> f16 { - unsafe { truncf16(a) } + truncf16(a) } #[doc = "Floating-point round to integral, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"] @@ -23450,7 +23450,7 @@ pub fn vrndmq_f64(a: float64x2_t) -> float64x2_t { #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(frintm))] pub fn vrndmh_f16(a: f16) -> f16 { - unsafe { floorf16(a) } + floorf16(a) } #[doc = "Floating-point round to integral, to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"] @@ -23581,7 +23581,7 @@ pub fn vrndpq_f64(a: float64x2_t) -> float64x2_t { #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(frintp))] pub fn vrndph_f16(a: f16) -> f16 { - unsafe { ceilf16(a) } + ceilf16(a) } #[doc = "Floating-point round to integral exact, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"] @@ -25079,7 +25079,7 @@ pub fn vsqrtq_f64(a: float64x2_t) -> float64x2_t { #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fsqrt))] pub fn vsqrth_f16(a: f16) -> f16 { - unsafe { sqrtf16(a) } + sqrtf16(a) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] diff --git a/stdarch/crates/core_arch/src/wasm32/mod.rs b/stdarch/crates/core_arch/src/wasm32/mod.rs index 60049c73295c1..01bf0a71658b8 100644 --- a/stdarch/crates/core_arch/src/wasm32/mod.rs +++ b/stdarch/crates/core_arch/src/wasm32/mod.rs @@ -43,7 +43,7 @@ pub fn unreachable() -> ! { #[must_use = "method returns a new number and does not mutate the original value"] #[unstable(feature = "wasm_numeric_instr", issue = "133908")] pub fn f32_ceil(a: f32) -> f32 { - unsafe { crate::intrinsics::ceilf32(a) } + crate::intrinsics::ceilf32(a) } /// Generates the [`f32.floor`] instruction, returning the largest integer less than or equal to `a`. @@ -57,7 +57,7 @@ pub fn f32_ceil(a: f32) -> f32 { #[must_use = "method returns a new number and does not mutate the original value"] #[unstable(feature = "wasm_numeric_instr", issue = "133908")] pub fn f32_floor(a: f32) -> f32 { - unsafe { crate::intrinsics::floorf32(a) } + crate::intrinsics::floorf32(a) } /// Generates the [`f32.trunc`] instruction, roundinging to the nearest integer towards zero. @@ -71,7 +71,7 @@ pub fn f32_floor(a: f32) -> f32 { #[must_use = "method returns a new number and does not mutate the original value"] #[unstable(feature = "wasm_numeric_instr", issue = "133908")] pub fn f32_trunc(a: f32) -> f32 { - unsafe { crate::intrinsics::truncf32(a) } + crate::intrinsics::truncf32(a) } /// Generates the [`f32.nearest`] instruction, roundinging to the nearest integer. Rounds half-way @@ -100,7 +100,7 @@ pub fn f32_nearest(a: f32) -> f32 { #[must_use = "method returns a new number and does not mutate the original value"] #[unstable(feature = "wasm_numeric_instr", issue = "133908")] pub fn f32_sqrt(a: f32) -> f32 { - unsafe { crate::intrinsics::sqrtf32(a) } + crate::intrinsics::sqrtf32(a) } /// Generates the [`f64.ceil`] instruction, returning the smallest integer greater than or equal to `a`. @@ -114,7 +114,7 @@ pub fn f32_sqrt(a: f32) -> f32 { #[must_use = "method returns a new number and does not mutate the original value"] #[unstable(feature = "wasm_numeric_instr", issue = "133908")] pub fn f64_ceil(a: f64) -> f64 { - unsafe { crate::intrinsics::ceilf64(a) } + crate::intrinsics::ceilf64(a) } /// Generates the [`f64.floor`] instruction, returning the largest integer less than or equal to `a`. @@ -128,7 +128,7 @@ pub fn f64_ceil(a: f64) -> f64 { #[must_use = "method returns a new number and does not mutate the original value"] #[unstable(feature = "wasm_numeric_instr", issue = "133908")] pub fn f64_floor(a: f64) -> f64 { - unsafe { crate::intrinsics::floorf64(a) } + crate::intrinsics::floorf64(a) } /// Generates the [`f64.trunc`] instruction, roundinging to the nearest integer towards zero. @@ -142,7 +142,7 @@ pub fn f64_floor(a: f64) -> f64 { #[must_use = "method returns a new number and does not mutate the original value"] #[unstable(feature = "wasm_numeric_instr", issue = "133908")] pub fn f64_trunc(a: f64) -> f64 { - unsafe { crate::intrinsics::truncf64(a) } + crate::intrinsics::truncf64(a) } /// Generates the [`f64.nearest`] instruction, roundinging to the nearest integer. Rounds half-way @@ -171,7 +171,7 @@ pub fn f64_nearest(a: f64) -> f64 { #[must_use = "method returns a new number and does not mutate the original value"] #[unstable(feature = "wasm_numeric_instr", issue = "133908")] pub fn f64_sqrt(a: f64) -> f64 { - unsafe { crate::intrinsics::sqrtf64(a) } + crate::intrinsics::sqrtf64(a) } unsafe extern "C-unwind" { diff --git a/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index a1a837bc61064..a83963589c353 100644 --- a/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -3108,7 +3108,7 @@ intrinsics: types: - [f16, 'h_'] compose: - - FnCall: [roundf16, [a], [], true] + - FnCall: [roundf16, [a], []] - name: "vrndn{neon_type.no}" doc: "Floating-point round to integral, to nearest with ties to even" @@ -3208,7 +3208,7 @@ intrinsics: types: - [f16, 'h_'] compose: - - FnCall: [floorf16, [a], [], true] + - FnCall: [floorf16, [a], []] @@ -3257,7 +3257,7 @@ intrinsics: types: - [f16, 'h_'] compose: - - FnCall: [ceilf16, [a], [], true] + - FnCall: [ceilf16, [a], []] - name: "vrnd{neon_type.no}" doc: "Floating-point round to integral, toward zero" @@ -3304,7 +3304,7 @@ intrinsics: types: - [f16, 'h_'] compose: - - FnCall: [truncf16, [a], [], true] + - FnCall: [truncf16, [a], []] - name: "vrndi{neon_type.no}" @@ -8499,7 +8499,7 @@ intrinsics: types: - [f16, 'h_'] compose: - - FnCall: [sqrtf16, [a], [], true] + - FnCall: [sqrtf16, [a], []] - name: "vrsqrts{type[0]}" doc: "Floating-point reciprocal square root step" @@ -10462,7 +10462,7 @@ intrinsics: types: - ["f16", "h_f16"] compose: - - FnCall: [fmaf16, [b, c, a], [], true] + - FnCall: [fmaf16, [b, c, a], []] - name: "vfmah_lane{type[2]}" From 58f94828092fb686e68b519a7e2154946e9a6349 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Sun, 21 Sep 2025 04:28:19 +0000 Subject: [PATCH 019/358] Revise `MaybeUninit` validity documentation Let's rewrite this for better clarity. In particular, let's document our language guarantees upfront and in positive form. We'll then list the caveats and the non-guarantees after. --- core/src/mem/maybe_uninit.rs | 101 +++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 33 deletions(-) diff --git a/core/src/mem/maybe_uninit.rs b/core/src/mem/maybe_uninit.rs index 47a34821e9018..2b074416141ce 100644 --- a/core/src/mem/maybe_uninit.rs +++ b/core/src/mem/maybe_uninit.rs @@ -255,51 +255,86 @@ use crate::{fmt, intrinsics, ptr, slice}; /// /// # Validity /// -/// A `MaybeUninit` has no validity requirement – any sequence of -/// [bytes][reference-byte] of the appropriate length, initialized or -/// uninitialized, are a valid representation of `MaybeUninit`. -/// -/// However, "round-tripping" via `MaybeUninit` does not always result in the -/// original value. `MaybeUninit` can have padding, and the contents of that -/// padding are not preserved. Concretely, given distinct `T` and `U` where -/// `size_of::() == size_of::()`, the following code is not guaranteed to -/// be sound: +/// `MaybeUninit` has no validity requirements –- any sequence of [bytes] of +/// the appropriate length, initialized or uninitialized, are a valid +/// representation. +/// +/// Moving or copying a value of type `MaybeUninit` (i.e., performing a +/// "typed copy") will exactly preserve the contents, including the +/// [provenance], of all non-padding bytes of type `T` in the value's +/// representation. +/// +/// Therefore `MaybeUninit` can be used to perform a round trip of a value from +/// type `T` to type `MaybeUninit` then back to type `T`, while preserving +/// the original value, if two conditions are met. One, type `U` must have the +/// same size as type `T`. Two, for all byte offsets where type `U` has padding, +/// the corresponding bytes in the representation of the value must be +/// uninitialized. +/// +/// For example, due to the fact that the type `[u8; size_of::]` has no +/// padding, the following is sound for any type `T` and will return the +/// original value: /// /// ```rust,no_run /// # use core::mem::{MaybeUninit, transmute}; -/// # struct T; struct U; +/// # struct T; /// fn identity(t: T) -> T { /// unsafe { +/// let u: MaybeUninit<[u8; size_of::()]> = transmute(t); +/// transmute(u) // OK. +/// } +/// } +/// ``` +/// +/// Note: Copying a value that contains references may implicitly reborrow them +/// causing the provenance of the returned value to differ from that of the +/// original. This applies equally to the trivial identity function: +/// +/// ```rust,no_run +/// fn trivial_identity(t: T) -> T { t } +/// ``` +/// +/// Note: Moving or copying a value whose representation has initialized bytes +/// at byte offsets where the type has padding may lose the value of those +/// bytes, so while the original value will be preserved, the original +/// *representation* of that value as bytes may not be. Again, this applies +/// equally to `trivial_identity`. +/// +/// Note: Performing this round trip when type `U` has padding at byte offsets +/// where the representation of the original value has initialized bytes may +/// produce undefined behavior or a different value. For example, the following +/// is unsound since `T` requires all bytes to be initialized: +/// +/// ```rust,no_run +/// # use core::mem::{MaybeUninit, transmute}; +/// #[repr(C)] struct T([u8; 4]); +/// #[repr(C)] struct U(u8, u16); +/// fn unsound_identity(t: T) -> T { +/// unsafe { /// let u: MaybeUninit = transmute(t); -/// transmute(u) +/// transmute(u) // UB. /// } /// } /// ``` /// -/// If the representation of `t` contains initialized bytes at byte offsets -/// where `U` contains padding bytes, these may not be preserved in -/// `MaybeUninit`. Transmuting `u` back to `T` (i.e., `transmute(u)` above) -/// may thus be undefined behavior or yield a value different from `t` due to -/// those bytes being lost. This is an active area of discussion, and this code -/// may become sound in the future. -/// -/// However, so long as no such byte offsets exist, then the preceding -/// `identity` example *is* sound. In particular, since `[u8; N]` has no padding -/// bytes, transmuting `t` to `MaybeUninit<[u8; size_of::]>` and back will -/// always produce the original value `t` again. This is true even if `t` -/// contains [provenance]: the resulting value will have the same provenance as -/// the original `t`. -/// -/// Note a potential footgun: if `t` contains a reference, then there may be -/// implicit reborrows of the reference any time it is copied, which may alter -/// its provenance. In that case, the value returned by `identity` may not be -/// exactly the same as its argument. However, even in this case, it remains -/// true that `identity` behaves the same as a function that just returns `t` -/// immediately (i.e., `fn identity(t: T) -> T { t }`). +/// Conversely, the following is sound since `T` allows uninitialized bytes in +/// the representation of a value, but the round trip may alter the value: /// -/// [provenance]: crate::ptr#provenance +/// ```rust,no_run +/// # use core::mem::{MaybeUninit, transmute}; +/// #[repr(C)] struct T(MaybeUninit<[u8; 4]>); +/// #[repr(C)] struct U(u8, u16); +/// fn non_identity(t: T) -> T { +/// unsafe { +/// // May lose an initialized byte. +/// let u: MaybeUninit = transmute(t); +/// transmute(u) +/// } +/// } +/// ``` /// -/// [reference-byte]: ../../reference/memory-model.html#bytes +/// [bytes]: ../../reference/memory-model.html#bytes +/// [provenance]: crate::ptr#provenance #[stable(feature = "maybe_uninit", since = "1.36.0")] // Lang item so we can wrap other types in it. This is useful for coroutines. #[lang = "maybe_uninit"] From da5a999dc55a594953db06e4f0c68a54dd15acbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Fri, 26 Sep 2025 17:32:00 +0200 Subject: [PATCH 020/358] Update memchr to 2.7.6 memchr 2.7.6 contains a bugfix for aarch64_be --- stdarch/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdarch/Cargo.lock b/stdarch/Cargo.lock index a10a456acce1d..ff503f3035069 100644 --- a/stdarch/Cargo.lock +++ b/stdarch/Cargo.lock @@ -404,9 +404,9 @@ checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "once_cell_polyfill" From 2aabf6accdacac4856fd87b473d6a922e413523c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Thu, 25 Sep 2025 15:15:41 +0200 Subject: [PATCH 021/358] Update memchr to 2.7.6 memchr 2.7.6 contains a bugfix for aarch64_be --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47fbf5169f491..dc4b7459d198c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,9 +148,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" dependencies = [ "rustc-std-workspace-core", ] From dc56fa6017a8d007534b3694dcee5103efd2ded0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20du=20Garreau?= Date: Sun, 28 Sep 2025 15:19:19 +0200 Subject: [PATCH 022/358] Move more code to `RawVec::finish_grow` This move a branch and more code into the cold method `finish_grow`, which means that less code is inlined at each `try_reserve` site. Additionally, this reduces the amount of parameters, so they can all be passed by registers. --- alloc/src/raw_vec/mod.rs | 95 +++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 55 deletions(-) diff --git a/alloc/src/raw_vec/mod.rs b/alloc/src/raw_vec/mod.rs index bc9692f5b6c2f..236e33e2f450e 100644 --- a/alloc/src/raw_vec/mod.rs +++ b/alloc/src/raw_vec/mod.rs @@ -668,8 +668,7 @@ impl RawVecInner { /// - `elem_layout` must be valid for `self`, i.e. it must be the same `elem_layout` used to /// initially construct `self` /// - `elem_layout`'s size must be a multiple of its alignment - /// - The sum of `len` and `additional` must be greater than or equal to - /// `self.capacity(elem_layout.size())` + /// - The sum of `len` and `additional` must be greater than the current capacity unsafe fn grow_amortized( &mut self, len: usize, @@ -693,16 +692,12 @@ impl RawVecInner { let cap = cmp::max(self.cap.as_inner() * 2, required_cap); let cap = cmp::max(min_non_zero_cap(elem_layout.size()), cap); - let new_layout = layout_array(cap, elem_layout)?; - // SAFETY: - // - For the `current_memory` call: Precondition passed to caller - // - For the `finish_grow` call: Precondition passed to caller - // + `current_memory` does the right thing - let ptr = - unsafe { finish_grow(new_layout, self.current_memory(elem_layout), &mut self.alloc)? }; + // - cap >= len + additional + // - other preconditions passed to caller + let ptr = unsafe { self.finish_grow(cap, elem_layout)? }; - // SAFETY: layout_array would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items + // SAFETY: `finish_grow` would have failed if `cap > isize::MAX` unsafe { self.set_ptr_and_cap(ptr, cap) }; Ok(()) } @@ -711,8 +706,7 @@ impl RawVecInner { /// - `elem_layout` must be valid for `self`, i.e. it must be the same `elem_layout` used to /// initially construct `self` /// - `elem_layout`'s size must be a multiple of its alignment - /// - The sum of `len` and `additional` must be greater than or equal to - /// `self.capacity(elem_layout.size())` + /// - The sum of `len` and `additional` must be greater than the current capacity unsafe fn grow_exact( &mut self, len: usize, @@ -726,21 +720,44 @@ impl RawVecInner { } let cap = len.checked_add(additional).ok_or(CapacityOverflow)?; - let new_layout = layout_array(cap, elem_layout)?; - // SAFETY: - // - For the `current_memory` call: Precondition passed to caller - // - For the `finish_grow` call: Precondition passed to caller - // + `current_memory` does the right thing - let ptr = - unsafe { finish_grow(new_layout, self.current_memory(elem_layout), &mut self.alloc)? }; - // SAFETY: layout_array would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items - unsafe { - self.set_ptr_and_cap(ptr, cap); - } + // SAFETY: preconditions passed to caller + let ptr = unsafe { self.finish_grow(cap, elem_layout)? }; + + // SAFETY: `finish_grow` would have failed if `cap > isize::MAX` + unsafe { self.set_ptr_and_cap(ptr, cap) }; Ok(()) } + /// # Safety + /// - `elem_layout` must be valid for `self`, i.e. it must be the same `elem_layout` used to + /// initially construct `self` + /// - `elem_layout`'s size must be a multiple of its alignment + /// - `cap` must be greater than the current capacity + // not marked inline(never) since we want optimizers to be able to observe the specifics of this + // function, see tests/codegen-llvm/vec-reserve-extend.rs. + #[cold] + unsafe fn finish_grow( + &self, + cap: usize, + elem_layout: Layout, + ) -> Result, TryReserveError> { + let new_layout = layout_array(cap, elem_layout)?; + + let memory = if let Some((ptr, old_layout)) = unsafe { self.current_memory(elem_layout) } { + debug_assert_eq!(old_layout.align(), new_layout.align()); + unsafe { + // The allocator checks for alignment equality + hint::assert_unchecked(old_layout.align() == new_layout.align()); + self.alloc.grow(ptr, old_layout, new_layout) + } + } else { + self.alloc.allocate(new_layout) + }; + + memory.map_err(|_| AllocError { layout: new_layout, non_exhaustive: () }.into()) + } + /// # Safety /// - `elem_layout` must be valid for `self`, i.e. it must be the same `elem_layout` used to /// initially construct `self` @@ -820,38 +837,6 @@ impl RawVecInner { } } -/// # Safety -/// If `current_memory` matches `Some((ptr, old_layout))`: -/// - `ptr` must denote a block of memory *currently allocated* via `alloc` -/// - `old_layout` must *fit* that block of memory -/// - `new_layout` must have the same alignment as `old_layout` -/// - `new_layout.size()` must be greater than or equal to `old_layout.size()` -/// If `current_memory` is `None`, this function is safe. -// not marked inline(never) since we want optimizers to be able to observe the specifics of this -// function, see tests/codegen-llvm/vec-reserve-extend.rs. -#[cold] -unsafe fn finish_grow( - new_layout: Layout, - current_memory: Option<(NonNull, Layout)>, - alloc: &mut A, -) -> Result, TryReserveError> -where - A: Allocator, -{ - let memory = if let Some((ptr, old_layout)) = current_memory { - debug_assert_eq!(old_layout.align(), new_layout.align()); - unsafe { - // The allocator checks for alignment equality - hint::assert_unchecked(old_layout.align() == new_layout.align()); - alloc.grow(ptr, old_layout, new_layout) - } - } else { - alloc.allocate(new_layout) - }; - - memory.map_err(|_| AllocError { layout: new_layout, non_exhaustive: () }.into()) -} - // Central function for reserve error handling. #[cfg(not(no_global_oom_handling))] #[cold] From 745f9bdba32db30d8d66597eee593216ed1aa100 Mon Sep 17 00:00:00 2001 From: Noa Date: Wed, 27 Aug 2025 00:10:09 -0500 Subject: [PATCH 023/358] Stabilize `fmt::{from_fn, FromFn}` under feature `fmt_from_fn` --- alloc/src/fmt.rs | 2 +- core/src/fmt/builders.rs | 9 ++++----- core/src/fmt/mod.rs | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/alloc/src/fmt.rs b/alloc/src/fmt.rs index 82eaf7d87244d..4d6fe220a09ad 100644 --- a/alloc/src/fmt.rs +++ b/alloc/src/fmt.rs @@ -602,7 +602,7 @@ pub use core::fmt::{DebugAsHex, FormattingOptions, Sign}; pub use core::fmt::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::fmt::{Formatter, Result, Write}; -#[unstable(feature = "debug_closure_helpers", issue = "117729")] +#[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] pub use core::fmt::{FromFn, from_fn}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::fmt::{LowerExp, UpperExp}; diff --git a/core/src/fmt/builders.rs b/core/src/fmt/builders.rs index 665b05b12ec07..dba54353e03f2 100644 --- a/core/src/fmt/builders.rs +++ b/core/src/fmt/builders.rs @@ -1216,7 +1216,6 @@ impl<'a, 'b: 'a> DebugMap<'a, 'b> { /// # Examples /// /// ``` -/// #![feature(debug_closure_helpers)] /// use std::fmt; /// /// let value = 'a'; @@ -1227,7 +1226,7 @@ impl<'a, 'b: 'a> DebugMap<'a, 'b> { /// assert_eq!(format!("{}", wrapped), "'a'"); /// assert_eq!(format!("{:?}", wrapped), "'a'"); /// ``` -#[unstable(feature = "debug_closure_helpers", issue = "117729")] +#[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] #[must_use = "returns a type implementing Debug and Display, which do not have any effects unless they are used"] pub fn from_fn) -> fmt::Result>(f: F) -> FromFn { FromFn(f) @@ -1236,12 +1235,12 @@ pub fn from_fn) -> fmt::Result>(f: F) -> FromFn /// Implements [`fmt::Debug`] and [`fmt::Display`] using a function. /// /// Created with [`from_fn`]. -#[unstable(feature = "debug_closure_helpers", issue = "117729")] +#[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] pub struct FromFn(F) where F: Fn(&mut fmt::Formatter<'_>) -> fmt::Result; -#[unstable(feature = "debug_closure_helpers", issue = "117729")] +#[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] impl fmt::Debug for FromFn where F: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, @@ -1251,7 +1250,7 @@ where } } -#[unstable(feature = "debug_closure_helpers", issue = "117729")] +#[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] impl fmt::Display for FromFn where F: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, diff --git a/core/src/fmt/mod.rs b/core/src/fmt/mod.rs index fcd2e52101ff0..1d592ab6800d0 100644 --- a/core/src/fmt/mod.rs +++ b/core/src/fmt/mod.rs @@ -39,7 +39,7 @@ pub use num_buffer::{NumBuffer, NumBufferTrait}; #[stable(feature = "debug_builders", since = "1.2.0")] pub use self::builders::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple}; -#[unstable(feature = "debug_closure_helpers", issue = "117729")] +#[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] pub use self::builders::{FromFn, from_fn}; /// The type returned by formatter methods. From c11e116bff8f05a239e0eb6d656276a3106e6c8f Mon Sep 17 00:00:00 2001 From: Noa Date: Wed, 27 Aug 2025 00:11:46 -0500 Subject: [PATCH 024/358] Reword docs slightly --- core/src/fmt/builders.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/fmt/builders.rs b/core/src/fmt/builders.rs index dba54353e03f2..e97c1a8f77e45 100644 --- a/core/src/fmt/builders.rs +++ b/core/src/fmt/builders.rs @@ -1210,8 +1210,8 @@ impl<'a, 'b: 'a> DebugMap<'a, 'b> { } } -/// Creates a type whose [`fmt::Debug`] and [`fmt::Display`] impls are provided with the function -/// `f`. +/// Creates a type whose [`fmt::Debug`] and [`fmt::Display`] impls are +/// forwarded to the provided closure. /// /// # Examples /// @@ -1232,7 +1232,7 @@ pub fn from_fn) -> fmt::Result>(f: F) -> FromFn FromFn(f) } -/// Implements [`fmt::Debug`] and [`fmt::Display`] using a function. +/// Implements [`fmt::Debug`] and [`fmt::Display`] via the provided closure. /// /// Created with [`from_fn`]. #[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] From 846108926b6e7044d928dd7fdabf0419e7e550a8 Mon Sep 17 00:00:00 2001 From: Noa Date: Fri, 19 Sep 2025 13:27:11 -0500 Subject: [PATCH 025/358] Remove F: Fn bound from FromFn struct --- core/src/fmt/builders.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/fmt/builders.rs b/core/src/fmt/builders.rs index e97c1a8f77e45..4ea6c6ba8fb9c 100644 --- a/core/src/fmt/builders.rs +++ b/core/src/fmt/builders.rs @@ -1236,9 +1236,7 @@ pub fn from_fn) -> fmt::Result>(f: F) -> FromFn /// /// Created with [`from_fn`]. #[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] -pub struct FromFn(F) -where - F: Fn(&mut fmt::Formatter<'_>) -> fmt::Result; +pub struct FromFn(F); #[stable(feature = "fmt_from_fn", since = "CURRENT_RUSTC_VERSION")] impl fmt::Debug for FromFn From 539c1c5c678374f233550affda30b2932ffe9b41 Mon Sep 17 00:00:00 2001 From: sayantn Date: Wed, 1 Oct 2025 07:16:44 +0530 Subject: [PATCH 026/358] Remove uses of deprecated `llvm.x86.addcarryx.u{32,64}` intrinsics - Correct mistake in x86_64/adx.rs where it was not testing `_addcarryx` at all --- stdarch/crates/core_arch/src/x86/adx.rs | 4 +--- stdarch/crates/core_arch/src/x86_64/adx.rs | 16 +++++++--------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/adx.rs b/stdarch/crates/core_arch/src/x86/adx.rs index 5ba766461653b..9ce65b76431a4 100644 --- a/stdarch/crates/core_arch/src/x86/adx.rs +++ b/stdarch/crates/core_arch/src/x86/adx.rs @@ -5,8 +5,6 @@ use stdarch_test::assert_instr; unsafe extern "unadjusted" { #[link_name = "llvm.x86.addcarry.32"] fn llvm_addcarry_u32(a: u8, b: u32, c: u32) -> (u8, u32); - #[link_name = "llvm.x86.addcarryx.u32"] - fn llvm_addcarryx_u32(a: u8, b: u32, c: u32, d: *mut u32) -> u8; #[link_name = "llvm.x86.subborrow.32"] fn llvm_subborrow_u32(a: u8, b: u32, c: u32) -> (u8, u32); } @@ -35,7 +33,7 @@ pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { - llvm_addcarryx_u32(c_in, a, b, out as *mut _) + _addcarry_u32(c_in, a, b, out) } /// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` diff --git a/stdarch/crates/core_arch/src/x86_64/adx.rs b/stdarch/crates/core_arch/src/x86_64/adx.rs index bdc534b5a525b..cf378cc169c34 100644 --- a/stdarch/crates/core_arch/src/x86_64/adx.rs +++ b/stdarch/crates/core_arch/src/x86_64/adx.rs @@ -5,8 +5,6 @@ use stdarch_test::assert_instr; unsafe extern "unadjusted" { #[link_name = "llvm.x86.addcarry.64"] fn llvm_addcarry_u64(a: u8, b: u64, c: u64) -> (u8, u64); - #[link_name = "llvm.x86.addcarryx.u64"] - fn llvm_addcarryx_u64(a: u8, b: u64, c: u64, d: *mut u64) -> u8; #[link_name = "llvm.x86.subborrow.64"] fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64); } @@ -35,7 +33,7 @@ pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { - llvm_addcarryx_u64(c_in, a, b, out as *mut _) + _addcarry_u64(c_in, a, b, out) } /// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`. @@ -95,27 +93,27 @@ mod tests { let a = u64::MAX; let mut out = 0; - let r = _addcarry_u64(0, a, 1, &mut out); + let r = _addcarryx_u64(0, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); - let r = _addcarry_u64(0, a, 0, &mut out); + let r = _addcarryx_u64(0, a, 0, &mut out); assert_eq!(r, 0); assert_eq!(out, a); - let r = _addcarry_u64(1, a, 1, &mut out); + let r = _addcarryx_u64(1, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 1); - let r = _addcarry_u64(1, a, 0, &mut out); + let r = _addcarryx_u64(1, a, 0, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); - let r = _addcarry_u64(0, 3, 4, &mut out); + let r = _addcarryx_u64(0, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 7); - let r = _addcarry_u64(1, 3, 4, &mut out); + let r = _addcarryx_u64(1, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 8); } From 898621a30749dd97b663319fe928a2534755f143 Mon Sep 17 00:00:00 2001 From: sayantn Date: Wed, 1 Oct 2025 07:20:30 +0530 Subject: [PATCH 027/358] Use SIMD intrinsics for `madd`, `hadd` and `hsub` intrinsics --- stdarch/crates/core_arch/src/x86/avx.rs | 32 ++++++---- stdarch/crates/core_arch/src/x86/avx2.rs | 65 +++++++++++++++----- stdarch/crates/core_arch/src/x86/avx512bw.rs | 17 ++++- stdarch/crates/core_arch/src/x86/sse2.rs | 9 ++- stdarch/crates/core_arch/src/x86/sse3.rs | 32 ++++++---- stdarch/crates/core_arch/src/x86/ssse3.rs | 44 ++++++++----- 6 files changed, 138 insertions(+), 61 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx.rs b/stdarch/crates/core_arch/src/x86/avx.rs index 24e0cf6ba1afb..34d3ff394bbc7 100644 --- a/stdarch/crates/core_arch/src/x86/avx.rs +++ b/stdarch/crates/core_arch/src/x86/avx.rs @@ -587,7 +587,11 @@ pub fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vhaddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { - unsafe { vhaddpd(a, b) } + unsafe { + let even = simd_shuffle!(a, b, [0, 4, 2, 6]); + let odd = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_add(even, odd) + } } /// Horizontal addition of adjacent pairs in the two packed vectors @@ -602,7 +606,11 @@ pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vhaddps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { - unsafe { vhaddps(a, b) } + unsafe { + let even = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]); + let odd = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]); + simd_add(even, odd) + } } /// Horizontal subtraction of adjacent pairs in the two packed vectors @@ -616,7 +624,11 @@ pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vhsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { - unsafe { vhsubpd(a, b) } + unsafe { + let even = simd_shuffle!(a, b, [0, 4, 2, 6]); + let odd = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_sub(even, odd) + } } /// Horizontal subtraction of adjacent pairs in the two packed vectors @@ -631,7 +643,11 @@ pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vhsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { - unsafe { vhsubps(a, b) } + unsafe { + let even = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]); + let odd = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]); + simd_sub(even, odd) + } } /// Computes the bitwise XOR of packed double-precision (64-bit) floating-point @@ -3044,14 +3060,6 @@ unsafe extern "C" { fn roundps256(a: __m256, b: i32) -> __m256; #[link_name = "llvm.x86.avx.dp.ps.256"] fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256; - #[link_name = "llvm.x86.avx.hadd.pd.256"] - fn vhaddpd(a: __m256d, b: __m256d) -> __m256d; - #[link_name = "llvm.x86.avx.hadd.ps.256"] - fn vhaddps(a: __m256, b: __m256) -> __m256; - #[link_name = "llvm.x86.avx.hsub.pd.256"] - fn vhsubpd(a: __m256d, b: __m256d) -> __m256d; - #[link_name = "llvm.x86.avx.hsub.ps.256"] - fn vhsubps(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.sse2.cmp.pd"] fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; #[link_name = "llvm.x86.avx.cmp.pd.256"] diff --git a/stdarch/crates/core_arch/src/x86/avx2.rs b/stdarch/crates/core_arch/src/x86/avx2.rs index 739de2b341260..1597e5af07332 100644 --- a/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/stdarch/crates/core_arch/src/x86/avx2.rs @@ -891,7 +891,21 @@ pub fn _mm256_extracti128_si256(a: __m256i) -> __m128i { #[cfg_attr(test, assert_instr(vphaddw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) } + let a = a.as_i16x16(); + let b = b.as_i16x16(); + unsafe { + let even: i16x16 = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30] + ); + let odd: i16x16 = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31] + ); + simd_add(even, odd).as_m256i() + } } /// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`. @@ -902,7 +916,13 @@ pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vphaddd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) } + let a = a.as_i32x8(); + let b = b.as_i32x8(); + unsafe { + let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]); + let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]); + simd_add(even, odd).as_m256i() + } } /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b` @@ -925,7 +945,21 @@ pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vphsubw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) } + let a = a.as_i16x16(); + let b = b.as_i16x16(); + unsafe { + let even: i16x16 = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30] + ); + let odd: i16x16 = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31] + ); + simd_sub(even, odd).as_m256i() + } } /// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`. @@ -936,7 +970,13 @@ pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vphsubd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) } + let a = a.as_i32x8(); + let b = b.as_i32x8(); + unsafe { + let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]); + let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]); + simd_sub(even, odd).as_m256i() + } } /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b` @@ -1714,7 +1754,12 @@ pub fn _mm256_inserti128_si256(a: __m256i, b: __m128i) -> __m25 #[cfg_attr(test, assert_instr(vpmaddwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) } + unsafe { + let r: i32x16 = simd_mul(simd_cast(a.as_i16x16()), simd_cast(b.as_i16x16())); + let even: i32x8 = simd_shuffle!(r, r, [0, 2, 4, 6, 8, 10, 12, 14]); + let odd: i32x8 = simd_shuffle!(r, r, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_add(even, odd).as_m256i() + } } /// Vertically multiplies each unsigned 8-bit integer from `a` with the @@ -3594,20 +3639,10 @@ pub fn _mm256_extract_epi16(a: __m256i) -> i32 { #[allow(improper_ctypes)] unsafe extern "C" { - #[link_name = "llvm.x86.avx2.phadd.w"] - fn phaddw(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx2.phadd.d"] - fn phaddd(a: i32x8, b: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.phadd.sw"] fn phaddsw(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx2.phsub.w"] - fn phsubw(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx2.phsub.d"] - fn phsubd(a: i32x8, b: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.phsub.sw"] fn phsubsw(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx2.pmadd.wd"] - fn pmaddwd(a: i16x16, b: i16x16) -> i32x8; #[link_name = "llvm.x86.avx2.pmadd.ub.sw"] fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16; #[link_name = "llvm.x86.avx2.maskload.d"] diff --git a/stdarch/crates/core_arch/src/x86/avx512bw.rs b/stdarch/crates/core_arch/src/x86/avx512bw.rs index 8139b8cd6f3cf..ad48cfd686c6d 100644 --- a/stdarch/crates/core_arch/src/x86/avx512bw.rs +++ b/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -5835,7 +5835,20 @@ pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpmaddwd))] pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) } + unsafe { + let r: i32x32 = simd_mul(simd_cast(a.as_i16x32()), simd_cast(b.as_i16x32())); + let even: i32x16 = simd_shuffle!( + r, + r, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let odd: i32x16 = simd_shuffle!( + r, + r, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + simd_add(even, odd).as_m512i() + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11617,8 +11630,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"] fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32; - #[link_name = "llvm.x86.avx512.pmaddw.d.512"] - fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16; #[link_name = "llvm.x86.avx512.pmaddubs.w.512"] fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32; diff --git a/stdarch/crates/core_arch/src/x86/sse2.rs b/stdarch/crates/core_arch/src/x86/sse2.rs index 1eaa89663b2ca..76945cfbbf229 100644 --- a/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/stdarch/crates/core_arch/src/x86/sse2.rs @@ -201,7 +201,12 @@ pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pmaddwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) } + unsafe { + let r: i32x8 = simd_mul(simd_cast(a.as_i16x8()), simd_cast(b.as_i16x8())); + let even: i32x4 = simd_shuffle!(r, r, [0, 2, 4, 6]); + let odd: i32x4 = simd_shuffle!(r, r, [1, 3, 5, 7]); + simd_add(even, odd).as_m128i() + } } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed @@ -3043,8 +3048,6 @@ unsafe extern "C" { fn lfence(); #[link_name = "llvm.x86.sse2.mfence"] fn mfence(); - #[link_name = "llvm.x86.sse2.pmadd.wd"] - fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; #[link_name = "llvm.x86.sse2.psad.bw"] fn psadbw(a: u8x16, b: u8x16) -> u64x2; #[link_name = "llvm.x86.sse2.psll.w"] diff --git a/stdarch/crates/core_arch/src/x86/sse3.rs b/stdarch/crates/core_arch/src/x86/sse3.rs index 7a32cfe472d43..79be7a7e9b2ce 100644 --- a/stdarch/crates/core_arch/src/x86/sse3.rs +++ b/stdarch/crates/core_arch/src/x86/sse3.rs @@ -51,7 +51,11 @@ pub fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(haddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { - unsafe { haddpd(a, b) } + unsafe { + let even = simd_shuffle!(a, b, [0, 2]); + let odd = simd_shuffle!(a, b, [1, 3]); + simd_add(even, odd) + } } /// Horizontally adds adjacent pairs of single-precision (32-bit) @@ -63,7 +67,11 @@ pub fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(haddps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 { - unsafe { haddps(a, b) } + unsafe { + let even = simd_shuffle!(a, b, [0, 2, 4, 6]); + let odd = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_add(even, odd) + } } /// Horizontally subtract adjacent pairs of double-precision (64-bit) @@ -75,7 +83,11 @@ pub fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(hsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { - unsafe { hsubpd(a, b) } + unsafe { + let even = simd_shuffle!(a, b, [0, 2]); + let odd = simd_shuffle!(a, b, [1, 3]); + simd_sub(even, odd) + } } /// Horizontally adds adjacent pairs of single-precision (32-bit) @@ -87,7 +99,11 @@ pub fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(hsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { - unsafe { hsubps(a, b) } + unsafe { + let even = simd_shuffle!(a, b, [0, 2, 4, 6]); + let odd = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_sub(even, odd) + } } /// Loads 128-bits of integer data from unaligned memory. @@ -153,14 +169,6 @@ pub fn _mm_moveldup_ps(a: __m128) -> __m128 { #[allow(improper_ctypes)] unsafe extern "C" { - #[link_name = "llvm.x86.sse3.hadd.pd"] - fn haddpd(a: __m128d, b: __m128d) -> __m128d; - #[link_name = "llvm.x86.sse3.hadd.ps"] - fn haddps(a: __m128, b: __m128) -> __m128; - #[link_name = "llvm.x86.sse3.hsub.pd"] - fn hsubpd(a: __m128d, b: __m128d) -> __m128d; - #[link_name = "llvm.x86.sse3.hsub.ps"] - fn hsubps(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse3.ldu.dq"] fn lddqu(mem_addr: *const i8) -> i8x16; } diff --git a/stdarch/crates/core_arch/src/x86/ssse3.rs b/stdarch/crates/core_arch/src/x86/ssse3.rs index 2be182e88f4ba..ac067bd4b5a7e 100644 --- a/stdarch/crates/core_arch/src/x86/ssse3.rs +++ b/stdarch/crates/core_arch/src/x86/ssse3.rs @@ -164,7 +164,13 @@ pub fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(phaddw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) } + let a = a.as_i16x8(); + let b = b.as_i16x8(); + unsafe { + let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_add(even, odd).as_m128i() + } } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -189,7 +195,13 @@ pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(phaddd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) } + let a = a.as_i32x4(); + let b = b.as_i32x4(); + unsafe { + let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]); + let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_add(even, odd).as_m128i() + } } /// Horizontally subtract the adjacent pairs of values contained in 2 @@ -201,7 +213,13 @@ pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(phsubw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) } + let a = a.as_i16x8(); + let b = b.as_i16x8(); + unsafe { + let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_sub(even, odd).as_m128i() + } } /// Horizontally subtract the adjacent pairs of values contained in 2 @@ -227,7 +245,13 @@ pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(phsubd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) } + let a = a.as_i32x4(); + let b = b.as_i32x4(); + unsafe { + let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]); + let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_sub(even, odd).as_m128i() + } } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -305,24 +329,12 @@ unsafe extern "C" { #[link_name = "llvm.x86.ssse3.pshuf.b.128"] fn pshufb128(a: u8x16, b: u8x16) -> u8x16; - #[link_name = "llvm.x86.ssse3.phadd.w.128"] - fn phaddw128(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.x86.ssse3.phadd.sw.128"] fn phaddsw128(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.x86.ssse3.phadd.d.128"] - fn phaddd128(a: i32x4, b: i32x4) -> i32x4; - - #[link_name = "llvm.x86.ssse3.phsub.w.128"] - fn phsubw128(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.x86.ssse3.phsub.sw.128"] fn phsubsw128(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.x86.ssse3.phsub.d.128"] - fn phsubd128(a: i32x4, b: i32x4) -> i32x4; - #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"] fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8; From 3d7234fa5bb4d955d7f9660676ac46354bee5b1a Mon Sep 17 00:00:00 2001 From: sayantn Date: Wed, 1 Oct 2025 07:22:12 +0530 Subject: [PATCH 028/358] Use SIMD intrinsics for shift and rotate intrinsics --- stdarch/crates/core_arch/src/x86/avx2.rs | 40 +- stdarch/crates/core_arch/src/x86/avx512bw.rs | 39 +- stdarch/crates/core_arch/src/x86/avx512f.rs | 403 +++++-------------- 3 files changed, 111 insertions(+), 371 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx2.rs b/stdarch/crates/core_arch/src/x86/avx2.rs index 1597e5af07332..20a3f7a29788b 100644 --- a/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/stdarch/crates/core_arch/src/x86/avx2.rs @@ -2778,7 +2778,7 @@ pub fn _mm256_bslli_epi128(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsllvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) } + unsafe { transmute(simd_shl(a.as_u32x4(), count.as_u32x4())) } } /// Shifts packed 32-bit integers in `a` left by the amount @@ -2791,7 +2791,7 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsllvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) } + unsafe { transmute(simd_shl(a.as_u32x8(), count.as_u32x8())) } } /// Shifts packed 64-bit integers in `a` left by the amount @@ -2804,7 +2804,7 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsllvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) } + unsafe { transmute(simd_shl(a.as_u64x2(), count.as_u64x2())) } } /// Shifts packed 64-bit integers in `a` left by the amount @@ -2817,7 +2817,7 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsllvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) } + unsafe { transmute(simd_shl(a.as_u64x4(), count.as_u64x4())) } } /// Shifts packed 16-bit integers in `a` right by `count` while @@ -2881,7 +2881,7 @@ pub fn _mm256_srai_epi32(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsravd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) } + unsafe { transmute(simd_shr(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 32-bit integers in `a` right by the amount specified by the @@ -2893,7 +2893,7 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsravd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) } + unsafe { transmute(simd_shr(a.as_i32x8(), count.as_i32x8())) } } /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. @@ -3076,7 +3076,7 @@ pub fn _mm256_srli_epi64(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrlvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) } + unsafe { transmute(simd_shr(a.as_u32x4(), count.as_u32x4())) } } /// Shifts packed 32-bit integers in `a` right by the amount specified by @@ -3088,7 +3088,7 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsrlvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) } + unsafe { transmute(simd_shr(a.as_u32x8(), count.as_u32x8())) } } /// Shifts packed 64-bit integers in `a` right by the amount specified by @@ -3100,7 +3100,7 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrlvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) } + unsafe { transmute(simd_shr(a.as_u64x2(), count.as_u64x2())) } } /// Shifts packed 64-bit integers in `a` right by the amount specified by @@ -3112,7 +3112,7 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsrlvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) } + unsafe { transmute(simd_shr(a.as_u64x4(), count.as_u64x4())) } } /// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr @@ -3687,36 +3687,16 @@ unsafe extern "C" { fn pslld(a: i32x8, count: i32x4) -> i32x8; #[link_name = "llvm.x86.avx2.psll.q"] fn psllq(a: i64x4, count: i64x2) -> i64x4; - #[link_name = "llvm.x86.avx2.psllv.d"] - fn psllvd(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.avx2.psllv.d.256"] - fn psllvd256(a: i32x8, count: i32x8) -> i32x8; - #[link_name = "llvm.x86.avx2.psllv.q"] - fn psllvq(a: i64x2, count: i64x2) -> i64x2; - #[link_name = "llvm.x86.avx2.psllv.q.256"] - fn psllvq256(a: i64x4, count: i64x4) -> i64x4; #[link_name = "llvm.x86.avx2.psra.w"] fn psraw(a: i16x16, count: i16x8) -> i16x16; #[link_name = "llvm.x86.avx2.psra.d"] fn psrad(a: i32x8, count: i32x4) -> i32x8; - #[link_name = "llvm.x86.avx2.psrav.d"] - fn psravd(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.avx2.psrav.d.256"] - fn psravd256(a: i32x8, count: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.psrl.w"] fn psrlw(a: i16x16, count: i16x8) -> i16x16; #[link_name = "llvm.x86.avx2.psrl.d"] fn psrld(a: i32x8, count: i32x4) -> i32x8; #[link_name = "llvm.x86.avx2.psrl.q"] fn psrlq(a: i64x4, count: i64x2) -> i64x4; - #[link_name = "llvm.x86.avx2.psrlv.d"] - fn psrlvd(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.avx2.psrlv.d.256"] - fn psrlvd256(a: i32x8, count: i32x8) -> i32x8; - #[link_name = "llvm.x86.avx2.psrlv.q"] - fn psrlvq(a: i64x2, count: i64x2) -> i64x2; - #[link_name = "llvm.x86.avx2.psrlv.q.256"] - fn psrlvq256(a: i64x4, count: i64x4) -> i64x4; #[link_name = "llvm.x86.avx2.pshuf.b"] fn pshufb(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.permd"] diff --git a/stdarch/crates/core_arch/src/x86/avx512bw.rs b/stdarch/crates/core_arch/src/x86/avx512bw.rs index ad48cfd686c6d..1771f196590c0 100644 --- a/stdarch/crates/core_arch/src/x86/avx512bw.rs +++ b/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -6852,7 +6852,7 @@ pub fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvw))] pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) } + unsafe { transmute(simd_shl(a.as_u16x32(), count.as_u16x32())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6891,7 +6891,7 @@ pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvw))] pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) } + unsafe { transmute(simd_shl(a.as_u16x16(), count.as_u16x16())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6930,7 +6930,7 @@ pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvw))] pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) } + unsafe { transmute(simd_shl(a.as_u16x8(), count.as_u16x8())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7188,7 +7188,7 @@ pub fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvw))] pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) } + unsafe { transmute(simd_shr(a.as_u16x32(), count.as_u16x32())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7227,7 +7227,7 @@ pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvw))] pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) } + unsafe { transmute(simd_shr(a.as_u16x16(), count.as_u16x16())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7266,7 +7266,7 @@ pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvw))] pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) } + unsafe { transmute(simd_shr(a.as_u16x8(), count.as_u16x8())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7511,7 +7511,7 @@ pub fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravw))] pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) } + unsafe { transmute(simd_shr(a.as_i16x32(), count.as_i16x32())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7550,7 +7550,7 @@ pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravw))] pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) } + unsafe { transmute(simd_shr(a.as_i16x16(), count.as_i16x16())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7589,7 +7589,7 @@ pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravw))] pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) } + unsafe { transmute(simd_shr(a.as_i16x8(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11645,33 +11645,12 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.psll.w.512"] fn vpsllw(a: i16x32, count: i16x8) -> i16x32; - #[link_name = "llvm.x86.avx512.psllv.w.512"] - fn vpsllvw(a: i16x32, b: i16x32) -> i16x32; - #[link_name = "llvm.x86.avx512.psllv.w.256"] - fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx512.psllv.w.128"] - fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.x86.avx512.psrl.w.512"] fn vpsrlw(a: i16x32, count: i16x8) -> i16x32; - #[link_name = "llvm.x86.avx512.psrlv.w.512"] - fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32; - #[link_name = "llvm.x86.avx512.psrlv.w.256"] - fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx512.psrlv.w.128"] - fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.x86.avx512.psra.w.512"] fn vpsraw(a: i16x32, count: i16x8) -> i16x32; - #[link_name = "llvm.x86.avx512.psrav.w.512"] - fn vpsravw(a: i16x32, count: i16x32) -> i16x32; - #[link_name = "llvm.x86.avx512.psrav.w.256"] - fn vpsravw256(a: i16x16, count: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx512.psrav.w.128"] - fn vpsravw128(a: i16x8, count: i16x8) -> i16x8; - #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"] fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32; #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"] diff --git a/stdarch/crates/core_arch/src/x86/avx512f.rs b/stdarch/crates/core_arch/src/x86/avx512f.rs index 52c6a11a43f0e..002534a65de52 100644 --- a/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -19077,12 +19077,8 @@ pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm512_rol_epi32(a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprold(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_rolv_epi32(a, _mm512_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19094,12 +19090,8 @@ pub fn _mm512_rol_epi32(a: __m512i) -> __m512i { #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(3)] pub fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprold(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_rolv_epi32(src, k, a, _mm512_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19111,12 +19103,8 @@ pub fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(2)] pub fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprold(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_maskz_rolv_epi32(k, a, _mm512_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -19128,12 +19116,8 @@ pub fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) -> __m5 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm256_rol_epi32(a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprold256(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_rolv_epi32(a, _mm256_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19145,12 +19129,8 @@ pub fn _mm256_rol_epi32(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(3)] pub fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprold256(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_rolv_epi32(src, k, a, _mm256_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19162,12 +19142,8 @@ pub fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m2 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(2)] pub fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprold256(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_maskz_rolv_epi32(k, a, _mm256_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -19179,12 +19155,8 @@ pub fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) -> __m25 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm_rol_epi32(a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprold128(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_rolv_epi32(a, _mm_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19196,12 +19168,8 @@ pub fn _mm_rol_epi32(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(3)] pub fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprold128(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_rolv_epi32(src, k, a, _mm_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19213,12 +19181,8 @@ pub fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(2)] pub fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprold128(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_maskz_rolv_epi32(k, a, _mm_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19230,12 +19194,8 @@ pub fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> __m128i #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm512_ror_epi32(a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprord(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_rorv_epi32(a, _mm512_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19247,12 +19207,8 @@ pub fn _mm512_ror_epi32(a: __m512i) -> __m512i { #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(3)] pub fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprord(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_rorv_epi32(src, k, a, _mm512_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19264,12 +19220,8 @@ pub fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(2)] pub fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprord(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_maskz_rorv_epi32(k, a, _mm512_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19281,12 +19233,8 @@ pub fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) -> __m5 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm256_ror_epi32(a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprord256(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_rorv_epi32(a, _mm256_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19298,12 +19246,8 @@ pub fn _mm256_ror_epi32(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(3)] pub fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprord256(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_rorv_epi32(src, k, a, _mm256_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19315,12 +19259,8 @@ pub fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m2 #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(2)] pub fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprord256(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_maskz_rorv_epi32(k, a, _mm256_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19332,12 +19272,8 @@ pub fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) -> __m25 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm_ror_epi32(a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprord128(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_rorv_epi32(a, _mm_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19349,12 +19285,8 @@ pub fn _mm_ror_epi32(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(3)] pub fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprord128(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_rorv_epi32(src, k, a, _mm_set1_epi32(IMM8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19366,12 +19298,8 @@ pub fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(2)] pub fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprord128(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_maskz_rorv_epi32(k, a, _mm_set1_epi32(IMM8)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -19383,12 +19311,8 @@ pub fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> __m128i #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm512_rol_epi64(a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprolq(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_rolv_epi64(a, _mm512_set1_epi64(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19400,12 +19324,8 @@ pub fn _mm512_rol_epi64(a: __m512i) -> __m512i { #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] pub fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprolq(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x8())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_rolv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19417,12 +19337,8 @@ pub fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m5 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] pub fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprolq(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x8::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_maskz_rolv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -19434,12 +19350,8 @@ pub fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) -> __m51 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm256_rol_epi64(a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprolq256(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_rolv_epi64(a, _mm256_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19451,12 +19363,8 @@ pub fn _mm256_rol_epi64(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] pub fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprolq256(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x4())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_rolv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19468,12 +19376,8 @@ pub fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m2 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] pub fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprolq256(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x4::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_maskz_rolv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -19485,12 +19389,8 @@ pub fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) -> __m25 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] pub fn _mm_rol_epi64(a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprolq128(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_rolv_epi64(a, _mm_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19502,12 +19402,8 @@ pub fn _mm_rol_epi64(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] pub fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprolq128(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x2())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_rolv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19519,12 +19415,8 @@ pub fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] pub fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprolq128(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x2::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_maskz_rolv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19536,12 +19428,8 @@ pub fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> __m128i #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(1)] pub fn _mm512_ror_epi64(a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprorq(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_rorv_epi64(a, _mm512_set1_epi64(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19553,12 +19441,8 @@ pub fn _mm512_ror_epi64(a: __m512i) -> __m512i { #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(3)] pub fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprorq(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x8())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_mask_rorv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19570,12 +19454,8 @@ pub fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m5 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(2)] pub fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) -> __m512i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprorq(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x8::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm512_maskz_rorv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19587,12 +19467,8 @@ pub fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) -> __m51 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(1)] pub fn _mm256_ror_epi64(a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprorq256(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_rorv_epi64(a, _mm256_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19604,12 +19480,8 @@ pub fn _mm256_ror_epi64(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(3)] pub fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprorq256(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x4())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_mask_rorv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19621,12 +19493,8 @@ pub fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m2 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(2)] pub fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) -> __m256i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprorq256(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x4::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm256_maskz_rorv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19638,12 +19506,8 @@ pub fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) -> __m25 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(1)] pub fn _mm_ror_epi64(a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprorq128(a, IMM8); - transmute(r) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_rorv_epi64(a, _mm_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19655,12 +19519,8 @@ pub fn _mm_ror_epi64(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(3)] pub fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprorq128(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x2())) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_mask_rorv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19672,12 +19532,8 @@ pub fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(2)] pub fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i) -> __m128i { - unsafe { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprorq128(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x2::ZERO)) - } + static_assert_uimm_bits!(IMM8, 8); + _mm_maskz_rorv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64)) } /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. @@ -21084,7 +20940,7 @@ pub fn _mm_maskz_srai_epi64(k: __mmask8, a: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravd))] pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } + unsafe { transmute(simd_shr(a.as_i32x16(), count.as_i32x16())) } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21179,7 +21035,7 @@ pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravq))] pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } + unsafe { transmute(simd_shr(a.as_i64x8(), count.as_i64x8())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21218,7 +21074,7 @@ pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m51 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravq))] pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) } + unsafe { transmute(simd_shr(a.as_i64x4(), count.as_i64x4())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21257,7 +21113,7 @@ pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m25 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravq))] pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) } + unsafe { transmute(simd_shr(a.as_i64x2(), count.as_i64x2())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21296,7 +21152,7 @@ pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvd))] pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) } + unsafe { transmute(simd_funnel_shl(a.as_u32x16(), a.as_u32x16(), b.as_u32x16())) } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21335,7 +21191,7 @@ pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvd))] pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) } + unsafe { transmute(simd_funnel_shl(a.as_u32x8(), a.as_u32x8(), b.as_u32x8())) } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21374,7 +21230,7 @@ pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvd))] pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) } + unsafe { transmute(simd_funnel_shl(a.as_u32x4(), a.as_u32x4(), b.as_u32x4())) } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21413,7 +21269,7 @@ pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvd))] pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) } + unsafe { transmute(simd_funnel_shr(a.as_u32x16(), a.as_u32x16(), b.as_u32x16())) } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21452,7 +21308,7 @@ pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvd))] pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) } + unsafe { transmute(simd_funnel_shr(a.as_u32x8(), a.as_u32x8(), b.as_u32x8())) } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21491,7 +21347,7 @@ pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvd))] pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) } + unsafe { transmute(simd_funnel_shr(a.as_u32x4(), a.as_u32x4(), b.as_u32x4())) } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21530,7 +21386,7 @@ pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvq))] pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) } + unsafe { transmute(simd_funnel_shl(a.as_u64x8(), a.as_u64x8(), b.as_u64x8())) } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21569,7 +21425,7 @@ pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvq))] pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) } + unsafe { transmute(simd_funnel_shl(a.as_u64x4(), a.as_u64x4(), b.as_u64x4())) } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21608,7 +21464,7 @@ pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvq))] pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) } + unsafe { transmute(simd_funnel_shl(a.as_u64x2(), a.as_u64x2(), b.as_u64x2())) } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21647,7 +21503,7 @@ pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvq))] pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) } + unsafe { transmute(simd_funnel_shr(a.as_u64x8(), a.as_u64x8(), b.as_u64x8())) } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21686,7 +21542,7 @@ pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvq))] pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) } + unsafe { transmute(simd_funnel_shr(a.as_u64x4(), a.as_u64x4(), b.as_u64x4())) } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21725,7 +21581,7 @@ pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvq))] pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) } + unsafe { transmute(simd_funnel_shr(a.as_u64x2(), a.as_u64x2(), b.as_u64x2())) } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21764,7 +21620,7 @@ pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvd))] pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } + unsafe { transmute(simd_shl(a.as_u32x16(), count.as_u32x16())) } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21859,7 +21715,7 @@ pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvd))] pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } + unsafe { transmute(simd_shr(a.as_u32x16(), count.as_u32x16())) } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21954,7 +21810,7 @@ pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvq))] pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } + unsafe { transmute(simd_shl(a.as_u64x8(), count.as_u64x8())) } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22049,7 +21905,7 @@ pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvq))] pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } + unsafe { transmute(simd_shr(a.as_u64x8(), count.as_u64x8())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -42902,71 +42758,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"] fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8; - #[link_name = "llvm.x86.avx512.mask.prol.d.512"] - fn vprold(a: i32x16, i8: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.prol.d.256"] - fn vprold256(a: i32x8, i8: i32) -> i32x8; - #[link_name = "llvm.x86.avx512.mask.prol.d.128"] - fn vprold128(a: i32x4, i8: i32) -> i32x4; - - #[link_name = "llvm.x86.avx512.mask.pror.d.512"] - fn vprord(a: i32x16, i8: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.pror.d.256"] - fn vprord256(a: i32x8, i8: i32) -> i32x8; - #[link_name = "llvm.x86.avx512.mask.pror.d.128"] - fn vprord128(a: i32x4, i8: i32) -> i32x4; - - #[link_name = "llvm.x86.avx512.mask.prol.q.512"] - fn vprolq(a: i64x8, i8: i32) -> i64x8; - #[link_name = "llvm.x86.avx512.mask.prol.q.256"] - fn vprolq256(a: i64x4, i8: i32) -> i64x4; - #[link_name = "llvm.x86.avx512.mask.prol.q.128"] - fn vprolq128(a: i64x2, i8: i32) -> i64x2; - - #[link_name = "llvm.x86.avx512.mask.pror.q.512"] - fn vprorq(a: i64x8, i8: i32) -> i64x8; - #[link_name = "llvm.x86.avx512.mask.pror.q.256"] - fn vprorq256(a: i64x4, i8: i32) -> i64x4; - #[link_name = "llvm.x86.avx512.mask.pror.q.128"] - fn vprorq128(a: i64x2, i8: i32) -> i64x2; - - #[link_name = "llvm.x86.avx512.mask.prolv.d.512"] - fn vprolvd(a: i32x16, b: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.prolv.d.256"] - fn vprolvd256(a: i32x8, b: i32x8) -> i32x8; - #[link_name = "llvm.x86.avx512.mask.prolv.d.128"] - fn vprolvd128(a: i32x4, b: i32x4) -> i32x4; - - #[link_name = "llvm.x86.avx512.mask.prorv.d.512"] - fn vprorvd(a: i32x16, b: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.prorv.d.256"] - fn vprorvd256(a: i32x8, b: i32x8) -> i32x8; - #[link_name = "llvm.x86.avx512.mask.prorv.d.128"] - fn vprorvd128(a: i32x4, b: i32x4) -> i32x4; - - #[link_name = "llvm.x86.avx512.mask.prolv.q.512"] - fn vprolvq(a: i64x8, b: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.mask.prolv.q.256"] - fn vprolvq256(a: i64x4, b: i64x4) -> i64x4; - #[link_name = "llvm.x86.avx512.mask.prolv.q.128"] - fn vprolvq128(a: i64x2, b: i64x2) -> i64x2; - - #[link_name = "llvm.x86.avx512.mask.prorv.q.512"] - fn vprorvq(a: i64x8, b: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.mask.prorv.q.256"] - fn vprorvq256(a: i64x4, b: i64x4) -> i64x4; - #[link_name = "llvm.x86.avx512.mask.prorv.q.128"] - fn vprorvq128(a: i64x2, b: i64x2) -> i64x2; - - #[link_name = "llvm.x86.avx512.psllv.d.512"] - fn vpsllvd(a: i32x16, b: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.psrlv.d.512"] - fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.psllv.q.512"] - fn vpsllvq(a: i64x8, b: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.psrlv.q.512"] - fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.psll.d.512"] fn vpslld(a: i32x16, count: i32x4) -> i32x16; #[link_name = "llvm.x86.avx512.psrl.d.512"] @@ -42986,16 +42777,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.psra.q.128"] fn vpsraq128(a: i64x2, count: i64x2) -> i64x2; - #[link_name = "llvm.x86.avx512.psrav.d.512"] - fn vpsravd(a: i32x16, count: i32x16) -> i32x16; - - #[link_name = "llvm.x86.avx512.psrav.q.512"] - fn vpsravq(a: i64x8, count: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.psrav.q.256"] - fn vpsravq256(a: i64x4, count: i64x4) -> i64x4; - #[link_name = "llvm.x86.avx512.psrav.q.128"] - fn vpsravq128(a: i64x2, count: i64x2) -> i64x2; - #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"] fn vpermilps(a: f32x16, b: i32x16) -> f32x16; #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"] From 07869dc2fd7946120f7ddd0a66d87d1ee551eab6 Mon Sep 17 00:00:00 2001 From: sayantn Date: Wed, 1 Oct 2025 07:23:10 +0530 Subject: [PATCH 029/358] Use SIMD intrinsics for f16 intrinsics --- .../crates/core_arch/src/x86/avx512fp16.rs | 92 ++++++++++++++++--- stdarch/crates/core_arch/src/x86/f16c.rs | 16 ++-- 2 files changed, 90 insertions(+), 18 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/stdarch/crates/core_arch/src/x86/avx512fp16.rs index a86fc7199b83c..a8cf1f246af1c 100644 --- a/stdarch/crates/core_arch/src/x86/avx512fp16.rs +++ b/stdarch/crates/core_arch/src/x86/avx512fp16.rs @@ -1615,7 +1615,7 @@ pub fn _mm_maskz_add_round_sh(k: __mmask8, a: __m128h, b: _ #[cfg_attr(test, assert_instr(vaddsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h { - _mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) + unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) + _mm_cvtsh_h(b)) } } /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the @@ -1628,7 +1628,16 @@ pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h { #[cfg_attr(test, assert_instr(vaddsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - _mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) + unsafe { + let extractsrc: f16 = simd_extract!(src, 0); + let mut add: f16 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) + } } /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the @@ -1641,7 +1650,15 @@ pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m #[cfg_attr(test, assert_instr(vaddsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - _mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) + unsafe { + let mut add: f16 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. @@ -1927,7 +1944,7 @@ pub fn _mm_maskz_sub_round_sh(k: __mmask8, a: __m128h, b: _ #[cfg_attr(test, assert_instr(vsubsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h { - _mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) + unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) - _mm_cvtsh_h(b)) } } /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the @@ -1940,7 +1957,16 @@ pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h { #[cfg_attr(test, assert_instr(vsubsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - _mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) + unsafe { + let extractsrc: f16 = simd_extract!(src, 0); + let mut add: f16 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) + } } /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the @@ -1953,7 +1979,15 @@ pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m #[cfg_attr(test, assert_instr(vsubsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - _mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) + unsafe { + let mut add: f16 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. @@ -2239,7 +2273,7 @@ pub fn _mm_maskz_mul_round_sh(k: __mmask8, a: __m128h, b: _ #[cfg_attr(test, assert_instr(vmulsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h { - _mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) + unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) * _mm_cvtsh_h(b)) } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the @@ -2252,7 +2286,16 @@ pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h { #[cfg_attr(test, assert_instr(vmulsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - _mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) + unsafe { + let extractsrc: f16 = simd_extract!(src, 0); + let mut add: f16 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the @@ -2265,7 +2308,15 @@ pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m #[cfg_attr(test, assert_instr(vmulsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - _mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) + unsafe { + let mut add: f16 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. @@ -2551,7 +2602,7 @@ pub fn _mm_maskz_div_round_sh(k: __mmask8, a: __m128h, b: _ #[cfg_attr(test, assert_instr(vdivsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h { - _mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) + unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) / _mm_cvtsh_h(b)) } } /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the @@ -2564,7 +2615,16 @@ pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h { #[cfg_attr(test, assert_instr(vdivsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - _mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) + unsafe { + let extractsrc: f16 = simd_extract!(src, 0); + let mut add: f16 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) + } } /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the @@ -2577,7 +2637,15 @@ pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m #[cfg_attr(test, assert_instr(vdivsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - _mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) + unsafe { + let mut add: f16 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) + } } /// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is diff --git a/stdarch/crates/core_arch/src/x86/f16c.rs b/stdarch/crates/core_arch/src/x86/f16c.rs index 7686b317d4d49..519cc38294a36 100644 --- a/stdarch/crates/core_arch/src/x86/f16c.rs +++ b/stdarch/crates/core_arch/src/x86/f16c.rs @@ -3,16 +3,13 @@ //! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769 use crate::core_arch::{simd::*, x86::*}; +use crate::intrinsics::simd::*; #[cfg(test)] use stdarch_test::assert_instr; #[allow(improper_ctypes)] unsafe extern "unadjusted" { - #[link_name = "llvm.x86.vcvtph2ps.128"] - fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4; - #[link_name = "llvm.x86.vcvtph2ps.256"] - fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8; #[link_name = "llvm.x86.vcvtps2ph.128"] fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8; #[link_name = "llvm.x86.vcvtps2ph.256"] @@ -29,7 +26,11 @@ unsafe extern "unadjusted" { #[cfg_attr(test, assert_instr("vcvtph2ps"))] #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] pub fn _mm_cvtph_ps(a: __m128i) -> __m128 { - unsafe { transmute(llvm_vcvtph2ps_128(transmute(a))) } + unsafe { + let a: f16x8 = transmute(a); + let a: f16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_cast(a) + } } /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector @@ -41,7 +42,10 @@ pub fn _mm_cvtph_ps(a: __m128i) -> __m128 { #[cfg_attr(test, assert_instr("vcvtph2ps"))] #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] pub fn _mm256_cvtph_ps(a: __m128i) -> __m256 { - unsafe { transmute(llvm_vcvtph2ps_256(transmute(a))) } + unsafe { + let a: f16x8 = transmute(a); + simd_cast(a) + } } /// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x From f939057b06e7c85003bfbb33e1cbfc1bd72d2443 Mon Sep 17 00:00:00 2001 From: sayantn Date: Wed, 1 Oct 2025 07:23:43 +0530 Subject: [PATCH 030/358] Use SIMD intrinsics for `cvtsi{,64}_{ss,sd}` intrinsics --- stdarch/crates/core_arch/src/x86/sse.rs | 4 +--- stdarch/crates/core_arch/src/x86/sse2.rs | 7 ++++--- stdarch/crates/core_arch/src/x86_64/sse.rs | 4 +--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/sse.rs b/stdarch/crates/core_arch/src/x86/sse.rs index 1eca66adc2c6a..c5c6dc26b5b62 100644 --- a/stdarch/crates/core_arch/src/x86/sse.rs +++ b/stdarch/crates/core_arch/src/x86/sse.rs @@ -882,7 +882,7 @@ pub fn _mm_cvtss_f32(a: __m128) -> f32 { #[cfg_attr(test, assert_instr(cvtsi2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 { - unsafe { cvtsi2ss(a, b) } + unsafe { simd_insert!(a, 0, b as f32) } } /// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html). @@ -1989,8 +1989,6 @@ unsafe extern "C" { fn cvtss2si(a: __m128) -> i32; #[link_name = "llvm.x86.sse.cvttss2si"] fn cvttss2si(a: __m128) -> i32; - #[link_name = "llvm.x86.sse.cvtsi2ss"] - fn cvtsi2ss(a: __m128, b: i32) -> __m128; #[link_name = "llvm.x86.sse.sfence"] fn sfence(); #[link_name = "llvm.x86.sse.stmxcsr"] diff --git a/stdarch/crates/core_arch/src/x86/sse2.rs b/stdarch/crates/core_arch/src/x86/sse2.rs index 76945cfbbf229..c9530a237a7d0 100644 --- a/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/stdarch/crates/core_arch/src/x86/sse2.rs @@ -2422,7 +2422,10 @@ pub fn _mm_cvtsd_f64(a: __m128d) -> f64 { #[cfg_attr(test, assert_instr(cvtss2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { - unsafe { cvtss2sd(a, b) } + unsafe { + let elt: f32 = simd_extract!(b, 0); + simd_insert!(a, 0, elt as f64) + } } /// Converts packed double-precision (64-bit) floating-point elements in `a` to @@ -3118,8 +3121,6 @@ unsafe extern "C" { fn cvtsd2si(a: __m128d) -> i32; #[link_name = "llvm.x86.sse2.cvtsd2ss"] fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; - #[link_name = "llvm.x86.sse2.cvtss2sd"] - fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; #[link_name = "llvm.x86.sse2.cvttpd2dq"] fn cvttpd2dq(a: __m128d) -> i32x4; #[link_name = "llvm.x86.sse2.cvttsd2si"] diff --git a/stdarch/crates/core_arch/src/x86_64/sse.rs b/stdarch/crates/core_arch/src/x86_64/sse.rs index 863c3cd2e7012..6bd7ec83ec999 100644 --- a/stdarch/crates/core_arch/src/x86_64/sse.rs +++ b/stdarch/crates/core_arch/src/x86_64/sse.rs @@ -11,8 +11,6 @@ unsafe extern "C" { fn cvtss2si64(a: __m128) -> i64; #[link_name = "llvm.x86.sse.cvttss2si64"] fn cvttss2si64(a: __m128) -> i64; - #[link_name = "llvm.x86.sse.cvtsi642ss"] - fn cvtsi642ss(a: __m128, b: i64) -> __m128; } /// Converts the lowest 32 bit float in the input vector to a 64 bit integer. @@ -65,7 +63,7 @@ pub fn _mm_cvttss_si64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(cvtsi2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { - unsafe { cvtsi642ss(a, b) } + unsafe { simd_insert!(a, 0, b as f32) } } #[cfg(test)] From e7ebcec025db9de7b3f3057f60f57ae9f815eb7a Mon Sep 17 00:00:00 2001 From: sayantn Date: Wed, 1 Oct 2025 09:33:15 +0530 Subject: [PATCH 031/358] Use SIMD intrinsics for `vperm2` intrinsics --- stdarch/crates/core_arch/src/x86/avx.rs | 46 +++++++++++++++++++----- stdarch/crates/core_arch/src/x86/avx2.rs | 4 +-- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx.rs b/stdarch/crates/core_arch/src/x86/avx.rs index 34d3ff394bbc7..0f8d6b0fa2dcd 100644 --- a/stdarch/crates/core_arch/src/x86/avx.rs +++ b/stdarch/crates/core_arch/src/x86/avx.rs @@ -1234,7 +1234,10 @@ pub fn _mm_permute_pd(a: __m128d) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - unsafe { vperm2f128ps256(a, b, IMM8 as i8) } + _mm256_castsi256_ps(_mm256_permute2f128_si256::( + _mm256_castps_si256(a), + _mm256_castps_si256(b), + )) } /// Shuffles 256 bits (composed of 4 packed double-precision (64-bit) @@ -1248,7 +1251,10 @@ pub fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> __m256 { #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 8); - unsafe { vperm2f128pd256(a, b, IMM8 as i8) } + _mm256_castsi256_pd(_mm256_permute2f128_si256::( + _mm256_castpd_si256(a), + _mm256_castpd_si256(b), + )) } /// Shuffles 128-bits (composed of integer data) selected by `imm8` @@ -1262,7 +1268,35 @@ pub fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> __m256 #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_permute2f128_si256(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8)) } + const fn idx(imm8: i32, pos: u32) -> u32 { + let part = if pos < 2 { + imm8 & 0xf + } else { + (imm8 & 0xf0) >> 4 + }; + 2 * (part as u32 & 0b11) + (pos & 1) + } + const fn idx0(imm8: i32, pos: u32) -> u32 { + let part = if pos < 2 { + imm8 & 0xf + } else { + (imm8 & 0xf0) >> 4 + }; + if part & 0b1000 != 0 { 4 } else { pos } + } + unsafe { + let r = simd_shuffle!( + a.as_i64x4(), + b.as_i64x4(), + [idx(IMM8, 0), idx(IMM8, 1), idx(IMM8, 2), idx(IMM8, 3)] + ); + let r: i64x4 = simd_shuffle!( + r, + i64x4::ZERO, + [idx0(IMM8, 0), idx0(IMM8, 1), idx0(IMM8, 2), idx0(IMM8, 3)] + ); + r.as_m256i() + } } /// Broadcasts a single-precision (32-bit) floating-point element from memory @@ -3092,12 +3126,6 @@ unsafe extern "C" { fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d; #[link_name = "llvm.x86.avx.vpermilvar.pd"] fn vpermilpd(a: __m128d, b: i64x2) -> __m128d; - #[link_name = "llvm.x86.avx.vperm2f128.ps.256"] - fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256; - #[link_name = "llvm.x86.avx.vperm2f128.pd.256"] - fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d; - #[link_name = "llvm.x86.avx.vperm2f128.si.256"] - fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8; #[link_name = "llvm.x86.avx.maskload.pd.256"] fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d; #[link_name = "llvm.x86.avx.maskstore.pd.256"] diff --git a/stdarch/crates/core_arch/src/x86/avx2.rs b/stdarch/crates/core_arch/src/x86/avx2.rs index 20a3f7a29788b..8be6629f7978b 100644 --- a/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/stdarch/crates/core_arch/src/x86/avx2.rs @@ -2330,7 +2330,7 @@ pub fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_permute2x128_si256(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(vperm2i128(a.as_i64x4(), b.as_i64x4(), IMM8 as i8)) } + _mm256_permute2f128_si256::(a, b) } /// Shuffles 64-bit floating-point elements in `a` across lanes using the @@ -3703,8 +3703,6 @@ unsafe extern "C" { fn permd(a: u32x8, b: u32x8) -> u32x8; #[link_name = "llvm.x86.avx2.permps"] fn permps(a: __m256, b: i32x8) -> __m256; - #[link_name = "llvm.x86.avx2.vperm2i128"] - fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4; #[link_name = "llvm.x86.avx2.gather.d.d"] fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4; #[link_name = "llvm.x86.avx2.gather.d.d.256"] From 65c80340880ece14e2cd59adf2c92cc86ec08403 Mon Sep 17 00:00:00 2001 From: sayantn Date: Wed, 1 Oct 2025 12:33:41 +0530 Subject: [PATCH 032/358] Use SIMD intrinsics for `test{z,c}` intrinsics --- stdarch/crates/core_arch/src/x86/avx.rs | 42 +++++++++++++---------- stdarch/crates/core_arch/src/x86/sse41.rs | 17 +++++---- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx.rs b/stdarch/crates/core_arch/src/x86/avx.rs index 0f8d6b0fa2dcd..c1bb897ce009d 100644 --- a/stdarch/crates/core_arch/src/x86/avx.rs +++ b/stdarch/crates/core_arch/src/x86/avx.rs @@ -1983,7 +1983,10 @@ pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { - unsafe { ptestz256(a.as_i64x4(), b.as_i64x4()) } + unsafe { + let r = simd_and(a.as_i64x4(), b.as_i64x4()); + (0i64 == simd_reduce_or(r)) as i32 + } } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1997,7 +2000,10 @@ pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { - unsafe { ptestc256(a.as_i64x4(), b.as_i64x4()) } + unsafe { + let r = simd_and(simd_xor(a.as_i64x4(), i64x4::splat(!0)), b.as_i64x4()); + (0i64 == simd_reduce_or(r)) as i32 + } } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -2081,7 +2087,10 @@ pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { - unsafe { vtestzpd(a, b) } + unsafe { + let r: i64x2 = simd_lt(transmute(_mm_and_pd(a, b)), i64x2::ZERO); + (0i64 == simd_reduce_or(r)) as i32 + } } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) @@ -2098,7 +2107,10 @@ pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { - unsafe { vtestcpd(a, b) } + unsafe { + let r: i64x2 = simd_lt(transmute(_mm_andnot_pd(a, b)), i64x2::ZERO); + (0i64 == simd_reduce_or(r)) as i32 + } } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) @@ -2185,7 +2197,10 @@ pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { - unsafe { vtestzps(a, b) } + unsafe { + let r: i32x4 = simd_lt(transmute(_mm_and_ps(a, b)), i32x4::ZERO); + (0i32 == simd_reduce_or(r)) as i32 + } } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) @@ -2202,7 +2217,10 @@ pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { - unsafe { vtestcps(a, b) } + unsafe { + let r: i32x4 = simd_lt(transmute(_mm_andnot_ps(a, b)), i32x4::ZERO); + (0i32 == simd_reduce_or(r)) as i32 + } } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) @@ -3148,10 +3166,6 @@ unsafe extern "C" { fn vrcpps(a: __m256) -> __m256; #[link_name = "llvm.x86.avx.rsqrt.ps.256"] fn vrsqrtps(a: __m256) -> __m256; - #[link_name = "llvm.x86.avx.ptestz.256"] - fn ptestz256(a: i64x4, b: i64x4) -> i32; - #[link_name = "llvm.x86.avx.ptestc.256"] - fn ptestc256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.ptestnzc.256"] fn ptestnzc256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.vtestz.pd.256"] @@ -3160,10 +3174,6 @@ unsafe extern "C" { fn vtestcpd256(a: __m256d, b: __m256d) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.pd.256"] fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32; - #[link_name = "llvm.x86.avx.vtestz.pd"] - fn vtestzpd(a: __m128d, b: __m128d) -> i32; - #[link_name = "llvm.x86.avx.vtestc.pd"] - fn vtestcpd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.pd"] fn vtestnzcpd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.avx.vtestz.ps.256"] @@ -3172,10 +3182,6 @@ unsafe extern "C" { fn vtestcps256(a: __m256, b: __m256) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.ps.256"] fn vtestnzcps256(a: __m256, b: __m256) -> i32; - #[link_name = "llvm.x86.avx.vtestz.ps"] - fn vtestzps(a: __m128, b: __m128) -> i32; - #[link_name = "llvm.x86.avx.vtestc.ps"] - fn vtestcps(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.ps"] fn vtestnzcps(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.avx.min.ps.256"] diff --git a/stdarch/crates/core_arch/src/x86/sse41.rs b/stdarch/crates/core_arch/src/x86/sse41.rs index 9aa200dfc07ab..f457c74aa9c17 100644 --- a/stdarch/crates/core_arch/src/x86/sse41.rs +++ b/stdarch/crates/core_arch/src/x86/sse41.rs @@ -1006,7 +1006,10 @@ pub fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { - unsafe { ptestz(a.as_i64x2(), mask.as_i64x2()) } + unsafe { + let r = simd_reduce_or(simd_and(a.as_i64x2(), mask.as_i64x2())); + (0i64 == r) as i32 + } } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -1029,7 +1032,13 @@ pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { - unsafe { ptestc(a.as_i64x2(), mask.as_i64x2()) } + unsafe { + let r = simd_reduce_or(simd_and( + simd_xor(a.as_i64x2(), i64x2::splat(!0)), + mask.as_i64x2(), + )); + (0i64 == r) as i32 + } } /// Tests whether the specified bits in a 128-bit integer vector are @@ -1165,10 +1174,6 @@ unsafe extern "C" { fn phminposuw(a: u16x8) -> u16x8; #[link_name = "llvm.x86.sse41.mpsadbw"] fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8; - #[link_name = "llvm.x86.sse41.ptestz"] - fn ptestz(a: i64x2, mask: i64x2) -> i32; - #[link_name = "llvm.x86.sse41.ptestc"] - fn ptestc(a: i64x2, mask: i64x2) -> i32; #[link_name = "llvm.x86.sse41.ptestnzc"] fn ptestnzc(a: i64x2, mask: i64x2) -> i32; } From 4fff554601ef319b9e786feae0307f438bb66cb2 Mon Sep 17 00:00:00 2001 From: Kivooeo Date: Wed, 1 Oct 2025 21:22:53 +0000 Subject: [PATCH 033/358] extended doc comment --- alloc/src/collections/btree/map.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/alloc/src/collections/btree/map.rs b/alloc/src/collections/btree/map.rs index adcb444d08c66..ca5b46c9b0fd0 100644 --- a/alloc/src/collections/btree/map.rs +++ b/alloc/src/collections/btree/map.rs @@ -1368,7 +1368,8 @@ impl BTreeMap { } /// Splits the collection into two at the given key. Returns everything after the given key, - /// including the key. + /// including the key. If the key is not present, the split will occur at the nearest + /// greater key, or return an empty map if no such key exists. /// /// # Examples /// From 987309d74b66a494861a3270dbca915a5f29de07 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 3 Oct 2025 02:20:50 +0530 Subject: [PATCH 034/358] fix: update the implementation of _kshiftri_mask32, _kshiftri_mask64, _kshiftli_mask32 and _kshiftli_mask64 to zero out when the amount of shift exceeds the bit length of the input argument. --- stdarch/crates/core_arch/src/x86/avx512bw.rs | 68 +++++++++++++++++--- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx512bw.rs b/stdarch/crates/core_arch/src/x86/avx512bw.rs index 1771f196590c0..094b89f3accf5 100644 --- a/stdarch/crates/core_arch/src/x86/avx512bw.rs +++ b/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -10440,7 +10440,7 @@ pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { #[rustc_legacy_const_generics(1)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _kshiftli_mask32(a: __mmask32) -> __mmask32 { - a << COUNT + a.unbounded_shl(COUNT) } /// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k. @@ -10451,7 +10451,7 @@ pub fn _kshiftli_mask32(a: __mmask32) -> __mmask32 { #[rustc_legacy_const_generics(1)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _kshiftli_mask64(a: __mmask64) -> __mmask64 { - a << COUNT + a.unbounded_shl(COUNT) } /// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k. @@ -10462,7 +10462,7 @@ pub fn _kshiftli_mask64(a: __mmask64) -> __mmask64 { #[rustc_legacy_const_generics(1)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _kshiftri_mask32(a: __mmask32) -> __mmask32 { - a >> COUNT + a.unbounded_shr(COUNT) } /// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k. @@ -10473,7 +10473,7 @@ pub fn _kshiftri_mask32(a: __mmask32) -> __mmask32 { #[rustc_legacy_const_generics(1)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _kshiftri_mask64(a: __mmask64) -> __mmask64 { - a >> COUNT + a.unbounded_shr(COUNT) } /// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst, @@ -20315,6 +20315,18 @@ mod tests { let r = _kshiftli_mask32::<3>(a); let e: __mmask32 = 0b0100101101001011_0100101101001000; assert_eq!(r, e); + + let r = _kshiftli_mask32::<31>(a); + let e: __mmask32 = 0b1000000000000000_0000000000000000; + assert_eq!(r, e); + + let r = _kshiftli_mask32::<32>(a); + let e: __mmask32 = 0b0000000000000000_0000000000000000; + assert_eq!(r, e); + + let r = _kshiftli_mask32::<33>(a); + let e: __mmask32 = 0b0000000000000000_0000000000000000; + assert_eq!(r, e); } #[simd_test(enable = "avx512bw")] @@ -20323,21 +20335,61 @@ mod tests { let r = _kshiftli_mask64::<3>(a); let e: __mmask64 = 0b0110100101101001011_0100101101001000; assert_eq!(r, e); + + let r = _kshiftli_mask64::<63>(a); + let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000; + assert_eq!(r, e); + + let r = _kshiftli_mask64::<64>(a); + let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000; + assert_eq!(r, e); + + let r = _kshiftli_mask64::<65>(a); + let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000; + assert_eq!(r, e); } #[simd_test(enable = "avx512bw")] unsafe fn test_kshiftri_mask32() { - let a: __mmask32 = 0b0110100101101001_0110100101101001; + let a: __mmask32 = 0b1010100101101001_0110100101101001; let r = _kshiftri_mask32::<3>(a); - let e: __mmask32 = 0b0000110100101101_0010110100101101; + let e: __mmask32 = 0b0001010100101101_0010110100101101; + assert_eq!(r, e); + + let r = _kshiftri_mask32::<31>(a); + let e: __mmask32 = 0b0000000000000000_0000000000000001; + assert_eq!(r, e); + + let r = _kshiftri_mask32::<32>(a); + let e: __mmask32 = 0b0000000000000000_0000000000000000; + assert_eq!(r, e); + + let r = _kshiftri_mask32::<33>(a); + let e: __mmask32 = 0b0000000000000000_0000000000000000; assert_eq!(r, e); } #[simd_test(enable = "avx512bw")] unsafe fn test_kshiftri_mask64() { - let a: __mmask64 = 0b0110100101101001011_0100101101001000; + let a: __mmask64 = 0b1010100101101001011_0100101101001000; let r = _kshiftri_mask64::<3>(a); - let e: __mmask64 = 0b0110100101101001_0110100101101001; + let e: __mmask64 = 0b1010100101101001_0110100101101001; + assert_eq!(r, e); + + let r = _kshiftri_mask64::<34>(a); + let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001; + assert_eq!(r, e); + + let r = _kshiftri_mask64::<35>(a); + let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000; + assert_eq!(r, e); + + let r = _kshiftri_mask64::<64>(a); + let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000; + assert_eq!(r, e); + + let r = _kshiftri_mask64::<65>(a); + let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000; assert_eq!(r, e); } From d710ff5c39209aac235df75925ed3cef00f29a11 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 3 Oct 2025 02:27:15 +0530 Subject: [PATCH 035/358] fix: update the implementation of _kshiftri_mask8 and _kshiftli_mask8 to zero out when the amount of shift exceeds the bit length of the input argument. --- stdarch/crates/core_arch/src/x86/avx512dq.rs | 32 +++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx512dq.rs b/stdarch/crates/core_arch/src/x86/avx512dq.rs index c90ec894f2174..afeb548a553c1 100644 --- a/stdarch/crates/core_arch/src/x86/avx512dq.rs +++ b/stdarch/crates/core_arch/src/x86/avx512dq.rs @@ -4602,7 +4602,7 @@ pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { #[rustc_legacy_const_generics(1)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _kshiftli_mask8(a: __mmask8) -> __mmask8 { - a << COUNT + a.unbounded_shl(COUNT) } /// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst. @@ -4613,7 +4613,7 @@ pub fn _kshiftli_mask8(a: __mmask8) -> __mmask8 { #[rustc_legacy_const_generics(1)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _kshiftri_mask8(a: __mmask8) -> __mmask8 { - a >> COUNT + a.unbounded_shr(COUNT) } /// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst, @@ -9856,13 +9856,37 @@ mod tests { let r = _kshiftli_mask8::<3>(a); let e: __mmask8 = 0b01001000; assert_eq!(r, e); + + let r = _kshiftli_mask8::<7>(a); + let e: __mmask8 = 0b10000000; + assert_eq!(r, e); + + let r = _kshiftli_mask8::<8>(a); + let e: __mmask8 = 0b00000000; + assert_eq!(r, e); + + let r = _kshiftli_mask8::<9>(a); + let e: __mmask8 = 0b00000000; + assert_eq!(r, e); } #[simd_test(enable = "avx512dq")] unsafe fn test_kshiftri_mask8() { - let a: __mmask8 = 0b01101001; + let a: __mmask8 = 0b10101001; let r = _kshiftri_mask8::<3>(a); - let e: __mmask8 = 0b00001101; + let e: __mmask8 = 0b00010101; + assert_eq!(r, e); + + let r = _kshiftri_mask8::<7>(a); + let e: __mmask8 = 0b00000001; + assert_eq!(r, e); + + let r = _kshiftri_mask8::<8>(a); + let e: __mmask8 = 0b00000000; + assert_eq!(r, e); + + let r = _kshiftri_mask8::<9>(a); + let e: __mmask8 = 0b00000000; assert_eq!(r, e); } From 68cb9a111d54d4bee3bca6f37ffd768ea132e9d7 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 3 Oct 2025 02:33:11 +0530 Subject: [PATCH 036/358] fix: update the implementation of _kshiftri_mask16 and _kshiftli_mask16 to zero out when the amount of shift exceeds 16. --- stdarch/crates/core_arch/src/x86/avx512f.rs | 32 ++++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx512f.rs b/stdarch/crates/core_arch/src/x86/avx512f.rs index 002534a65de52..001b877812041 100644 --- a/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -29090,7 +29090,7 @@ pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { #[rustc_legacy_const_generics(1)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _kshiftli_mask16(a: __mmask16) -> __mmask16 { - a << COUNT + a.unbounded_shl(COUNT) } /// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst. @@ -29101,7 +29101,7 @@ pub fn _kshiftli_mask16(a: __mmask16) -> __mmask16 { #[rustc_legacy_const_generics(1)] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _kshiftri_mask16(a: __mmask16) -> __mmask16 { - a >> COUNT + a.unbounded_shr(COUNT) } /// Load 16-bit mask from memory @@ -56001,13 +56001,37 @@ mod tests { let r = _kshiftli_mask16::<3>(a); let e: __mmask16 = 0b1011011000011000; assert_eq!(r, e); + + let r = _kshiftli_mask16::<15>(a); + let e: __mmask16 = 0b1000000000000000; + assert_eq!(r, e); + + let r = _kshiftli_mask16::<16>(a); + let e: __mmask16 = 0b0000000000000000; + assert_eq!(r, e); + + let r = _kshiftli_mask16::<17>(a); + let e: __mmask16 = 0b0000000000000000; + assert_eq!(r, e); } #[simd_test(enable = "avx512dq")] unsafe fn test_kshiftri_mask16() { - let a: __mmask16 = 0b0110100100111100; + let a: __mmask16 = 0b1010100100111100; let r = _kshiftri_mask16::<3>(a); - let e: __mmask16 = 0b0000110100100111; + let e: __mmask16 = 0b0001010100100111; + assert_eq!(r, e); + + let r = _kshiftri_mask16::<15>(a); + let e: __mmask16 = 0b0000000000000001; + assert_eq!(r, e); + + let r = _kshiftri_mask16::<16>(a); + let e: __mmask16 = 0b0000000000000000; + assert_eq!(r, e); + + let r = _kshiftri_mask16::<17>(a); + let e: __mmask16 = 0b0000000000000000; assert_eq!(r, e); } From 2875d19175f10228fa4b1329b7a7a96895ce5914 Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 3 Oct 2025 03:30:50 +0530 Subject: [PATCH 037/358] Revert uses of SIMD intrinsics for shifts --- stdarch/crates/core_arch/src/x86/avx2.rs | 40 +++++++++++++++----- stdarch/crates/core_arch/src/x86/avx512bw.rs | 39 ++++++++++++++----- stdarch/crates/core_arch/src/x86/avx512f.rs | 35 +++++++++++++---- 3 files changed, 87 insertions(+), 27 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx2.rs b/stdarch/crates/core_arch/src/x86/avx2.rs index 8be6629f7978b..91c10638e0bf0 100644 --- a/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/stdarch/crates/core_arch/src/x86/avx2.rs @@ -2778,7 +2778,7 @@ pub fn _mm256_bslli_epi128(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsllvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shl(a.as_u32x4(), count.as_u32x4())) } + unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 32-bit integers in `a` left by the amount @@ -2791,7 +2791,7 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsllvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shl(a.as_u32x8(), count.as_u32x8())) } + unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) } } /// Shifts packed 64-bit integers in `a` left by the amount @@ -2804,7 +2804,7 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsllvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shl(a.as_u64x2(), count.as_u64x2())) } + unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) } } /// Shifts packed 64-bit integers in `a` left by the amount @@ -2817,7 +2817,7 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsllvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shl(a.as_u64x4(), count.as_u64x4())) } + unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) } } /// Shifts packed 16-bit integers in `a` right by `count` while @@ -2881,7 +2881,7 @@ pub fn _mm256_srai_epi32(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsravd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shr(a.as_i32x4(), count.as_i32x4())) } + unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 32-bit integers in `a` right by the amount specified by the @@ -2893,7 +2893,7 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsravd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shr(a.as_i32x8(), count.as_i32x8())) } + unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) } } /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. @@ -3076,7 +3076,7 @@ pub fn _mm256_srli_epi64(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrlvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shr(a.as_u32x4(), count.as_u32x4())) } + unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 32-bit integers in `a` right by the amount specified by @@ -3088,7 +3088,7 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsrlvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shr(a.as_u32x8(), count.as_u32x8())) } + unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) } } /// Shifts packed 64-bit integers in `a` right by the amount specified by @@ -3100,7 +3100,7 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrlvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shr(a.as_u64x2(), count.as_u64x2())) } + unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) } } /// Shifts packed 64-bit integers in `a` right by the amount specified by @@ -3112,7 +3112,7 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpsrlvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shr(a.as_u64x4(), count.as_u64x4())) } + unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) } } /// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr @@ -3687,16 +3687,36 @@ unsafe extern "C" { fn pslld(a: i32x8, count: i32x4) -> i32x8; #[link_name = "llvm.x86.avx2.psll.q"] fn psllq(a: i64x4, count: i64x2) -> i64x4; + #[link_name = "llvm.x86.avx2.psllv.d"] + fn psllvd(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.psllv.d.256"] + fn psllvd256(a: i32x8, count: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.psllv.q"] + fn psllvq(a: i64x2, count: i64x2) -> i64x2; + #[link_name = "llvm.x86.avx2.psllv.q.256"] + fn psllvq256(a: i64x4, count: i64x4) -> i64x4; #[link_name = "llvm.x86.avx2.psra.w"] fn psraw(a: i16x16, count: i16x8) -> i16x16; #[link_name = "llvm.x86.avx2.psra.d"] fn psrad(a: i32x8, count: i32x4) -> i32x8; + #[link_name = "llvm.x86.avx2.psrav.d"] + fn psravd(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.psrav.d.256"] + fn psravd256(a: i32x8, count: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.psrl.w"] fn psrlw(a: i16x16, count: i16x8) -> i16x16; #[link_name = "llvm.x86.avx2.psrl.d"] fn psrld(a: i32x8, count: i32x4) -> i32x8; #[link_name = "llvm.x86.avx2.psrl.q"] fn psrlq(a: i64x4, count: i64x2) -> i64x4; + #[link_name = "llvm.x86.avx2.psrlv.d"] + fn psrlvd(a: i32x4, count: i32x4) -> i32x4; + #[link_name = "llvm.x86.avx2.psrlv.d.256"] + fn psrlvd256(a: i32x8, count: i32x8) -> i32x8; + #[link_name = "llvm.x86.avx2.psrlv.q"] + fn psrlvq(a: i64x2, count: i64x2) -> i64x2; + #[link_name = "llvm.x86.avx2.psrlv.q.256"] + fn psrlvq256(a: i64x4, count: i64x4) -> i64x4; #[link_name = "llvm.x86.avx2.pshuf.b"] fn pshufb(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.permd"] diff --git a/stdarch/crates/core_arch/src/x86/avx512bw.rs b/stdarch/crates/core_arch/src/x86/avx512bw.rs index 1771f196590c0..ad48cfd686c6d 100644 --- a/stdarch/crates/core_arch/src/x86/avx512bw.rs +++ b/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -6852,7 +6852,7 @@ pub fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvw))] pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shl(a.as_u16x32(), count.as_u16x32())) } + unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6891,7 +6891,7 @@ pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvw))] pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shl(a.as_u16x16(), count.as_u16x16())) } + unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6930,7 +6930,7 @@ pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvw))] pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shl(a.as_u16x8(), count.as_u16x8())) } + unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7188,7 +7188,7 @@ pub fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvw))] pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shr(a.as_u16x32(), count.as_u16x32())) } + unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7227,7 +7227,7 @@ pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvw))] pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shr(a.as_u16x16(), count.as_u16x16())) } + unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7266,7 +7266,7 @@ pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvw))] pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shr(a.as_u16x8(), count.as_u16x8())) } + unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7511,7 +7511,7 @@ pub fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravw))] pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shr(a.as_i16x32(), count.as_i16x32())) } + unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7550,7 +7550,7 @@ pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravw))] pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shr(a.as_i16x16(), count.as_i16x16())) } + unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7589,7 +7589,7 @@ pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravw))] pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shr(a.as_i16x8(), count.as_i16x8())) } + unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11645,12 +11645,33 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.psll.w.512"] fn vpsllw(a: i16x32, count: i16x8) -> i16x32; + #[link_name = "llvm.x86.avx512.psllv.w.512"] + fn vpsllvw(a: i16x32, b: i16x32) -> i16x32; + #[link_name = "llvm.x86.avx512.psllv.w.256"] + fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx512.psllv.w.128"] + fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.x86.avx512.psrl.w.512"] fn vpsrlw(a: i16x32, count: i16x8) -> i16x32; + #[link_name = "llvm.x86.avx512.psrlv.w.512"] + fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32; + #[link_name = "llvm.x86.avx512.psrlv.w.256"] + fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx512.psrlv.w.128"] + fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.x86.avx512.psra.w.512"] fn vpsraw(a: i16x32, count: i16x8) -> i16x32; + #[link_name = "llvm.x86.avx512.psrav.w.512"] + fn vpsravw(a: i16x32, count: i16x32) -> i16x32; + #[link_name = "llvm.x86.avx512.psrav.w.256"] + fn vpsravw256(a: i16x16, count: i16x16) -> i16x16; + #[link_name = "llvm.x86.avx512.psrav.w.128"] + fn vpsravw128(a: i16x8, count: i16x8) -> i16x8; + #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"] fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32; #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"] diff --git a/stdarch/crates/core_arch/src/x86/avx512f.rs b/stdarch/crates/core_arch/src/x86/avx512f.rs index 002534a65de52..ace1cad72387f 100644 --- a/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -20940,7 +20940,7 @@ pub fn _mm_maskz_srai_epi64(k: __mmask8, a: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravd))] pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shr(a.as_i32x16(), count.as_i32x16())) } + unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21035,7 +21035,7 @@ pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravq))] pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shr(a.as_i64x8(), count.as_i64x8())) } + unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21074,7 +21074,7 @@ pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m51 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravq))] pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i { - unsafe { transmute(simd_shr(a.as_i64x4(), count.as_i64x4())) } + unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21113,7 +21113,7 @@ pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m25 #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsravq))] pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(simd_shr(a.as_i64x2(), count.as_i64x2())) } + unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21620,7 +21620,7 @@ pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvd))] pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shl(a.as_u32x16(), count.as_u32x16())) } + unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21715,7 +21715,7 @@ pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvd))] pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shr(a.as_u32x16(), count.as_u32x16())) } + unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21810,7 +21810,7 @@ pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllvq))] pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shl(a.as_u64x8(), count.as_u64x8())) } + unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21905,7 +21905,7 @@ pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlvq))] pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { - unsafe { transmute(simd_shr(a.as_u64x8(), count.as_u64x8())) } + unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -42758,6 +42758,15 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"] fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8; + #[link_name = "llvm.x86.avx512.psllv.d.512"] + fn vpsllvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psrlv.d.512"] + fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psllv.q.512"] + fn vpsllvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.psrlv.q.512"] + fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.psll.d.512"] fn vpslld(a: i32x16, count: i32x4) -> i32x16; #[link_name = "llvm.x86.avx512.psrl.d.512"] @@ -42777,6 +42786,16 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.psra.q.128"] fn vpsraq128(a: i64x2, count: i64x2) -> i64x2; + #[link_name = "llvm.x86.avx512.psrav.d.512"] + fn vpsravd(a: i32x16, count: i32x16) -> i32x16; + + #[link_name = "llvm.x86.avx512.psrav.q.512"] + fn vpsravq(a: i64x8, count: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.psrav.q.256"] + fn vpsravq256(a: i64x4, count: i64x4) -> i64x4; + #[link_name = "llvm.x86.avx512.psrav.q.128"] + fn vpsravq128(a: i64x2, count: i64x2) -> i64x2; + #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"] fn vpermilps(a: f32x16, b: i32x16) -> f32x16; #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"] From 06781c45aef7af7b81d098def2e8050a2db816dd Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 3 Oct 2025 03:51:34 +0530 Subject: [PATCH 038/358] Ensure `simd_funnel_sh{l,r}` always gets passed shift amounts in range --- stdarch/crates/core_arch/src/x86/avx512f.rs | 96 ++++++++++-- .../crates/core_arch/src/x86/avx512vbmi2.rs | 144 +++++++++++++++--- 2 files changed, 210 insertions(+), 30 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx512f.rs b/stdarch/crates/core_arch/src/x86/avx512f.rs index ace1cad72387f..155cf266c31af 100644 --- a/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -21152,7 +21152,13 @@ pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvd))] pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shl(a.as_u32x16(), a.as_u32x16(), b.as_u32x16())) } + unsafe { + transmute(simd_funnel_shl( + a.as_u32x16(), + a.as_u32x16(), + simd_and(b.as_u32x16(), u32x16::splat(31)), + )) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21191,7 +21197,13 @@ pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvd))] pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shl(a.as_u32x8(), a.as_u32x8(), b.as_u32x8())) } + unsafe { + transmute(simd_funnel_shl( + a.as_u32x8(), + a.as_u32x8(), + simd_and(b.as_u32x8(), u32x8::splat(31)), + )) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21230,7 +21242,13 @@ pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvd))] pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shl(a.as_u32x4(), a.as_u32x4(), b.as_u32x4())) } + unsafe { + transmute(simd_funnel_shl( + a.as_u32x4(), + a.as_u32x4(), + simd_and(b.as_u32x4(), u32x4::splat(31)), + )) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21269,7 +21287,13 @@ pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvd))] pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shr(a.as_u32x16(), a.as_u32x16(), b.as_u32x16())) } + unsafe { + transmute(simd_funnel_shr( + a.as_u32x16(), + a.as_u32x16(), + simd_and(b.as_u32x16(), u32x16::splat(31)), + )) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21308,7 +21332,13 @@ pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvd))] pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shr(a.as_u32x8(), a.as_u32x8(), b.as_u32x8())) } + unsafe { + transmute(simd_funnel_shr( + a.as_u32x8(), + a.as_u32x8(), + simd_and(b.as_u32x8(), u32x8::splat(31)), + )) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21347,7 +21377,13 @@ pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvd))] pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shr(a.as_u32x4(), a.as_u32x4(), b.as_u32x4())) } + unsafe { + transmute(simd_funnel_shr( + a.as_u32x4(), + a.as_u32x4(), + simd_and(b.as_u32x4(), u32x4::splat(31)), + )) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21386,7 +21422,13 @@ pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvq))] pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shl(a.as_u64x8(), a.as_u64x8(), b.as_u64x8())) } + unsafe { + transmute(simd_funnel_shl( + a.as_u64x8(), + a.as_u64x8(), + simd_and(b.as_u64x8(), u64x8::splat(63)), + )) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21425,7 +21467,13 @@ pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvq))] pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shl(a.as_u64x4(), a.as_u64x4(), b.as_u64x4())) } + unsafe { + transmute(simd_funnel_shl( + a.as_u64x4(), + a.as_u64x4(), + simd_and(b.as_u64x4(), u64x4::splat(63)), + )) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21464,7 +21512,13 @@ pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprolvq))] pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shl(a.as_u64x2(), a.as_u64x2(), b.as_u64x2())) } + unsafe { + transmute(simd_funnel_shl( + a.as_u64x2(), + a.as_u64x2(), + simd_and(b.as_u64x2(), u64x2::splat(63)), + )) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21503,7 +21557,13 @@ pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvq))] pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shr(a.as_u64x8(), a.as_u64x8(), b.as_u64x8())) } + unsafe { + transmute(simd_funnel_shr( + a.as_u64x8(), + a.as_u64x8(), + simd_and(b.as_u64x8(), u64x8::splat(63)), + )) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21542,7 +21602,13 @@ pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvq))] pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shr(a.as_u64x4(), a.as_u64x4(), b.as_u64x4())) } + unsafe { + transmute(simd_funnel_shr( + a.as_u64x4(), + a.as_u64x4(), + simd_and(b.as_u64x4(), u64x4::splat(63)), + )) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21581,7 +21647,13 @@ pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vprorvq))] pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shr(a.as_u64x2(), a.as_u64x2(), b.as_u64x2())) } + unsafe { + transmute(simd_funnel_shr( + a.as_u64x2(), + a.as_u64x2(), + simd_and(b.as_u64x2(), u64x2::splat(63)), + )) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). diff --git a/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs b/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs index 09a90e29bf088..e25fd4528dc20 100644 --- a/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs +++ b/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs @@ -500,7 +500,13 @@ pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvq))] pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shl(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i64x8(), + b.as_i64x8(), + simd_and(c.as_i64x8(), i64x8::splat(63)), + )) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -539,7 +545,13 @@ pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvq))] pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shl(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i64x4(), + b.as_i64x4(), + simd_and(c.as_i64x4(), i64x4::splat(63)), + )) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -578,7 +590,13 @@ pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvq))] pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shl(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i64x2(), + b.as_i64x2(), + simd_and(c.as_i64x2(), i64x2::splat(63)), + )) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -617,7 +635,13 @@ pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvd))] pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shl(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i32x16(), + b.as_i32x16(), + simd_and(c.as_i32x16(), i32x16::splat(31)), + )) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -656,7 +680,13 @@ pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvd))] pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shl(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i32x8(), + b.as_i32x8(), + simd_and(c.as_i32x8(), i32x8::splat(31)), + )) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -695,7 +725,13 @@ pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvd))] pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shl(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i32x4(), + b.as_i32x4(), + simd_and(c.as_i32x4(), i32x4::splat(31)), + )) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -734,7 +770,13 @@ pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvw))] pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shl(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i16x32(), + b.as_i16x32(), + simd_and(c.as_i16x32(), i16x32::splat(15)), + )) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -773,7 +815,13 @@ pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvw))] pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shl(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i16x16(), + b.as_i16x16(), + simd_and(c.as_i16x16(), i16x16::splat(15)), + )) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -812,7 +860,13 @@ pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshldvw))] pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shl(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) } + unsafe { + transmute(simd_funnel_shl( + a.as_i16x8(), + b.as_i16x8(), + simd_and(c.as_i16x8(), i16x8::splat(15)), + )) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -851,7 +905,13 @@ pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shr(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i64x8(), + a.as_i64x8(), + simd_and(c.as_i64x8(), i64x8::splat(63)), + )) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -890,7 +950,13 @@ pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shr(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i64x4(), + a.as_i64x4(), + simd_and(c.as_i64x4(), i64x4::splat(63)), + )) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -929,7 +995,13 @@ pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvq))] pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shr(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i64x2(), + a.as_i64x2(), + simd_and(c.as_i64x2(), i64x2::splat(63)), + )) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -968,7 +1040,13 @@ pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shr(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i32x16(), + a.as_i32x16(), + simd_and(c.as_i32x16(), i32x16::splat(31)), + )) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1007,7 +1085,13 @@ pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shr(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i32x8(), + a.as_i32x8(), + simd_and(c.as_i32x8(), i32x8::splat(31)), + )) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1046,7 +1130,13 @@ pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvd))] pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shr(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i32x4(), + a.as_i32x4(), + simd_and(c.as_i32x4(), i32x4::splat(31)), + )) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1085,7 +1175,13 @@ pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - unsafe { transmute(simd_funnel_shr(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i16x32(), + a.as_i16x32(), + simd_and(c.as_i16x32(), i16x32::splat(15)), + )) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1124,7 +1220,13 @@ pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - unsafe { transmute(simd_funnel_shr(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i16x16(), + a.as_i16x16(), + simd_and(c.as_i16x16(), i16x16::splat(15)), + )) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1163,7 +1265,13 @@ pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpshrdvw))] pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - unsafe { transmute(simd_funnel_shr(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) } + unsafe { + transmute(simd_funnel_shr( + b.as_i16x8(), + a.as_i16x8(), + simd_and(c.as_i16x8(), i16x8::splat(15)), + )) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). From ce9b082f0ffa679eb886eee303f6bc8679fdf16c Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 3 Oct 2025 05:33:13 +0530 Subject: [PATCH 039/358] Use SIMD intrinsics for `vfmaddsubph` and `vfmsubaddph` --- .../crates/core_arch/src/x86/avx512fp16.rs | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/stdarch/crates/core_arch/src/x86/avx512fp16.rs index a8cf1f246af1c..2f02b70fa86f9 100644 --- a/stdarch/crates/core_arch/src/x86/avx512fp16.rs +++ b/stdarch/crates/core_arch/src/x86/avx512fp16.rs @@ -7184,7 +7184,11 @@ pub fn _mm_maskz_fnmsub_round_sh( #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - unsafe { vfmaddsubph_128(a, b, c) } + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(sub, add, [0, 9, 2, 11, 4, 13, 6, 15]) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7235,7 +7239,15 @@ pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { - unsafe { vfmaddsubph_256(a, b, c) } + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!( + sub, + add, + [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31] + ) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7286,7 +7298,18 @@ pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { - _mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!( + sub, + add, + [ + 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, 20, 53, + 22, 55, 24, 57, 26, 59, 28, 61, 30, 63 + ] + ) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7459,7 +7482,7 @@ pub fn _mm512_maskz_fmaddsub_round_ph( #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - unsafe { vfmaddsubph_128(a, b, simd_neg(c)) } + _mm_fmaddsub_ph(a, b, unsafe { simd_neg(c) }) } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7510,7 +7533,7 @@ pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { - unsafe { vfmaddsubph_256(a, b, simd_neg(c)) } + _mm256_fmaddsub_ph(a, b, unsafe { simd_neg(c) }) } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7561,7 +7584,7 @@ pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { - _mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) + _mm512_fmaddsub_ph(a, b, unsafe { simd_neg(c) }) } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -16409,10 +16432,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"] fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16; - #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"] - fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h; - #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"] - fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h; #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"] fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h; From acdd1306cd90394b7e262ba291849533d98c6dc4 Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Fri, 3 Oct 2025 00:00:39 -0400 Subject: [PATCH 040/358] Mitigate `thread_local!` shadowing issues Mitigates https://github.com/rust-lang/rust/issues/147006 and https://github.com/rust-lang/rust/issues/99018 --- std/src/sys/thread_local/native/mod.rs | 26 +++++++++++++------------- std/src/sys/thread_local/no_threads.rs | 16 ++++++++-------- std/src/sys/thread_local/os.rs | 8 ++++---- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/std/src/sys/thread_local/native/mod.rs b/std/src/sys/thread_local/native/mod.rs index 5dc142408047e..38b373be56c9d 100644 --- a/std/src/sys/thread_local/native/mod.rs +++ b/std/src/sys/thread_local/native/mod.rs @@ -55,7 +55,7 @@ pub macro thread_local_inner { // Used to generate the `LocalKey` value for const-initialized thread locals. (@key $t:ty, $(#[$align_attr:meta])*, const $init:expr) => {{ - const __INIT: $t = $init; + const __RUST_STD_INTERNAL_INIT: $t = $init; unsafe { $crate::thread::LocalKey::new(const { @@ -63,16 +63,16 @@ pub macro thread_local_inner { |_| { #[thread_local] $(#[$align_attr])* - static VAL: $crate::thread::local_impl::EagerStorage<$t> - = $crate::thread::local_impl::EagerStorage::new(__INIT); - VAL.get() + static __RUST_STD_INTERNAL_VAL: $crate::thread::local_impl::EagerStorage<$t> + = $crate::thread::local_impl::EagerStorage::new(__RUST_STD_INTERNAL_INIT); + __RUST_STD_INTERNAL_VAL.get() } } else { |_| { #[thread_local] $(#[$align_attr])* - static VAL: $t = __INIT; - &VAL + static __RUST_STD_INTERNAL_VAL: $t = __RUST_STD_INTERNAL_INIT; + &__RUST_STD_INTERNAL_VAL } } }) @@ -82,27 +82,27 @@ pub macro thread_local_inner { // used to generate the `LocalKey` value for `thread_local!` (@key $t:ty, $(#[$align_attr:meta])*, $init:expr) => {{ #[inline] - fn __init() -> $t { + fn __rust_std_internal_init_fn() -> $t { $init } unsafe { $crate::thread::LocalKey::new(const { if $crate::mem::needs_drop::<$t>() { - |init| { + |__rust_std_internal_init| { #[thread_local] $(#[$align_attr])* - static VAL: $crate::thread::local_impl::LazyStorage<$t, ()> + static __RUST_STD_INTERNAL_VAL: $crate::thread::local_impl::LazyStorage<$t, ()> = $crate::thread::local_impl::LazyStorage::new(); - VAL.get_or_init(init, __init) + __RUST_STD_INTERNAL_VAL.get_or_init(__rust_std_internal_init, __rust_std_internal_init_fn) } } else { - |init| { + |__rust_std_internal_init| { #[thread_local] $(#[$align_attr])* - static VAL: $crate::thread::local_impl::LazyStorage<$t, !> + static __RUST_STD_INTERNAL_VAL: $crate::thread::local_impl::LazyStorage<$t, !> = $crate::thread::local_impl::LazyStorage::new(); - VAL.get_or_init(init, __init) + __RUST_STD_INTERNAL_VAL.get_or_init(__rust_std_internal_init, __rust_std_internal_init_fn) } } }) diff --git a/std/src/sys/thread_local/no_threads.rs b/std/src/sys/thread_local/no_threads.rs index 409dfb19518d9..936d464be9f1c 100644 --- a/std/src/sys/thread_local/no_threads.rs +++ b/std/src/sys/thread_local/no_threads.rs @@ -13,15 +13,15 @@ use crate::ptr; pub macro thread_local_inner { // used to generate the `LocalKey` value for const-initialized thread locals (@key $t:ty, $(#[$align_attr:meta])*, const $init:expr) => {{ - const __INIT: $t = $init; + const __RUST_STD_INTERNAL_INIT: $t = $init; // NOTE: Please update the shadowing test in `tests/thread.rs` if these types are renamed. unsafe { $crate::thread::LocalKey::new(|_| { $(#[$align_attr])* - static VAL: $crate::thread::local_impl::EagerStorage<$t> = - $crate::thread::local_impl::EagerStorage { value: __INIT }; - &VAL.value + static __RUST_STD_INTERNAL_VAL: $crate::thread::local_impl::EagerStorage<$t> = + $crate::thread::local_impl::EagerStorage { value: __RUST_STD_INTERNAL_INIT }; + &__RUST_STD_INTERNAL_VAL.value }) } }}, @@ -29,13 +29,13 @@ pub macro thread_local_inner { // used to generate the `LocalKey` value for `thread_local!` (@key $t:ty, $(#[$align_attr:meta])*, $init:expr) => {{ #[inline] - fn __init() -> $t { $init } + fn __rust_std_internal_init_fn() -> $t { $init } unsafe { - $crate::thread::LocalKey::new(|init| { + $crate::thread::LocalKey::new(|__rust_std_internal_init| { $(#[$align_attr])* - static VAL: $crate::thread::local_impl::LazyStorage<$t> = $crate::thread::local_impl::LazyStorage::new(); - VAL.get(init, __init) + static __RUST_STD_INTERNAL_VAL: $crate::thread::local_impl::LazyStorage<$t> = $crate::thread::local_impl::LazyStorage::new(); + __RUST_STD_INTERNAL_VAL.get(__rust_std_internal_init, __rust_std_internal_init_fn) }) } }}, diff --git a/std/src/sys/thread_local/os.rs b/std/src/sys/thread_local/os.rs index 88bb5ae7c650d..9f7a29236e926 100644 --- a/std/src/sys/thread_local/os.rs +++ b/std/src/sys/thread_local/os.rs @@ -21,14 +21,14 @@ pub macro thread_local_inner { // used to generate the `LocalKey` value for `thread_local!`. (@key $t:ty, $($(#[$($align_attr:tt)*])+)?, $init:expr) => {{ #[inline] - fn __init() -> $t { $init } + fn __rust_std_internal_init_fn() -> $t { $init } // NOTE: this cannot import `LocalKey` or `Storage` with a `use` because that can shadow // user provided type or type alias with a matching name. Please update the shadowing test // in `tests/thread.rs` if these types are renamed. unsafe { - $crate::thread::LocalKey::new(|init| { - static VAL: $crate::thread::local_impl::Storage<$t, { + $crate::thread::LocalKey::new(|__rust_std_internal_init| { + static __RUST_STD_INTERNAL_VAL: $crate::thread::local_impl::Storage<$t, { $({ // Ensure that attributes have valid syntax // and that the proper feature gate is enabled @@ -43,7 +43,7 @@ pub macro thread_local_inner { final_align }> = $crate::thread::local_impl::Storage::new(); - VAL.get(init, __init) + __RUST_STD_INTERNAL_VAL.get(__rust_std_internal_init, __rust_std_internal_init_fn) }) } }}, From a874eeb98794f6635df865f4aa28e0639fe4c547 Mon Sep 17 00:00:00 2001 From: sayantn Date: Sat, 4 Oct 2025 13:04:29 +0530 Subject: [PATCH 041/358] Fix xsave segfaults --- .../x86_64-unknown-linux-gnu/Dockerfile | 2 - stdarch/crates/core_arch/src/x86/xsave.rs | 34 +++++++++------ stdarch/crates/core_arch/src/x86_64/xsave.rs | 41 +++++-------------- 3 files changed, 33 insertions(+), 44 deletions(-) diff --git a/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 99bfd056fb443..c9951a77ff6c8 100644 --- a/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -14,5 +14,3 @@ RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \ -rtm-mode full -tsx --" -# These tests fail with SDE as it doesn't support saving register data -ENV STDARCH_TEST_SKIP_FUNCTION="xsave,xsaveopt,xsave64,xsaveopt64" diff --git a/stdarch/crates/core_arch/src/x86/xsave.rs b/stdarch/crates/core_arch/src/x86/xsave.rs index 10266662e13ec..190cef929e470 100644 --- a/stdarch/crates/core_arch/src/x86/xsave.rs +++ b/stdarch/crates/core_arch/src/x86/xsave.rs @@ -159,29 +159,39 @@ pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) { xrstors(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } +#[cfg(test)] +pub(crate) use tests::XsaveArea; + #[cfg(test)] mod tests { - use std::{fmt, prelude::v1::*}; + use std::boxed::Box; use crate::core_arch::x86::*; use stdarch_test::simd_test; - #[repr(align(64))] #[derive(Debug)] - struct XsaveArea { - // max size for 256-bit registers is 800 bytes: - // see https://software.intel.com/en-us/node/682996 - // max size for 512-bit registers is 2560 bytes: - // FIXME: add source - data: [u8; 2560], + pub(crate) struct XsaveArea { + data: Box<[AlignedArray]>, } + #[repr(align(64))] + #[derive(Copy, Clone, Debug)] + struct AlignedArray([u8; 64]); + impl XsaveArea { - fn new() -> XsaveArea { - XsaveArea { data: [0; 2560] } + #[target_feature(enable = "xsave")] + pub(crate) fn new() -> XsaveArea { + // `CPUID.(EAX=0DH,ECX=0):ECX` contains the size required to hold all supported xsave + // components. `EBX` contains the size required to hold all xsave components currently + // enabled in `XCR0`. We are using `ECX` to ensure enough space in all scenarios + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x0d) }; + + XsaveArea { + data: vec![AlignedArray([0; 64]); ecx.div_ceil(64) as usize].into_boxed_slice(), + } } - fn ptr(&mut self) -> *mut u8 { - self.data.as_mut_ptr() + pub(crate) fn ptr(&mut self) -> *mut u8 { + self.data.as_mut_ptr().cast() } } diff --git a/stdarch/crates/core_arch/src/x86_64/xsave.rs b/stdarch/crates/core_arch/src/x86_64/xsave.rs index ca2367307f8db..fa1454a822e31 100644 --- a/stdarch/crates/core_arch/src/x86_64/xsave.rs +++ b/stdarch/crates/core_arch/src/x86_64/xsave.rs @@ -126,29 +126,10 @@ pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) { #[cfg(test)] mod tests { - use crate::core_arch::x86_64::xsave; - use std::fmt; + use crate::core_arch::x86::*; + use crate::core_arch::x86_64::*; use stdarch_test::simd_test; - #[repr(align(64))] - #[derive(Debug)] - struct XsaveArea { - // max size for 256-bit registers is 800 bytes: - // see https://software.intel.com/en-us/node/682996 - // max size for 512-bit registers is 2560 bytes: - // FIXME: add source - data: [u8; 2560], - } - - impl XsaveArea { - fn new() -> XsaveArea { - XsaveArea { data: [0; 2560] } - } - fn ptr(&mut self) -> *mut u8 { - self.data.as_mut_ptr() - } - } - #[simd_test(enable = "xsave")] #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri unsafe fn test_xsave64() { @@ -156,9 +137,9 @@ mod tests { let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); - xsave::_xsave64(a.ptr(), m); - xsave::_xrstor64(a.ptr(), m); - xsave::_xsave64(b.ptr(), m); + _xsave64(a.ptr(), m); + _xrstor64(a.ptr(), m); + _xsave64(b.ptr(), m); } #[simd_test(enable = "xsave,xsaveopt")] @@ -168,9 +149,9 @@ mod tests { let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); - xsave::_xsaveopt64(a.ptr(), m); - xsave::_xrstor64(a.ptr(), m); - xsave::_xsaveopt64(b.ptr(), m); + _xsaveopt64(a.ptr(), m); + _xrstor64(a.ptr(), m); + _xsaveopt64(b.ptr(), m); } #[simd_test(enable = "xsave,xsavec")] @@ -180,8 +161,8 @@ mod tests { let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); - xsave::_xsavec64(a.ptr(), m); - xsave::_xrstor64(a.ptr(), m); - xsave::_xsavec64(b.ptr(), m); + _xsavec64(a.ptr(), m); + _xrstor64(a.ptr(), m); + _xsavec64(b.ptr(), m); } } From bc5f3b6fd51ecb59f8e4f256f581c073b0d9390e Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 2 Oct 2025 00:41:58 +0530 Subject: [PATCH 042/358] Use Inline ASM for SSE4a nontemporal stores --- stdarch/crates/core_arch/src/x86/sse4a.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/sse4a.rs b/stdarch/crates/core_arch/src/x86/sse4a.rs index 051b77d02dfe0..b9692a2783a7a 100644 --- a/stdarch/crates/core_arch/src/x86/sse4a.rs +++ b/stdarch/crates/core_arch/src/x86/sse4a.rs @@ -15,10 +15,6 @@ unsafe extern "C" { fn insertq(x: i64x2, y: i64x2) -> i64x2; #[link_name = "llvm.x86.sse4a.insertqi"] fn insertqi(x: i64x2, y: i64x2, len: u8, idx: u8) -> i64x2; - #[link_name = "llvm.x86.sse4a.movnt.sd"] - fn movntsd(x: *mut f64, y: __m128d); - #[link_name = "llvm.x86.sse4a.movnt.ss"] - fn movntss(x: *mut f32, y: __m128); } /// Extracts the bit range specified by `y` from the lower 64 bits of `x`. @@ -114,7 +110,12 @@ pub fn _mm_inserti_si64(x: __m128i, y: __m128i) #[cfg_attr(test, assert_instr(movntsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { - movntsd(p, a); + crate::arch::asm!( + vps!("movntsd", ",{a}"), + p = in(reg) p, + a = in(xmm_reg) a, + options(nostack, preserves_flags), + ); } /// Non-temporal store of `a.0` into `p`. @@ -134,7 +135,12 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { #[cfg_attr(test, assert_instr(movntss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) { - movntss(p, a); + crate::arch::asm!( + vps!("movntss", ",{a}"), + p = in(reg) p, + a = in(xmm_reg) a, + options(nostack, preserves_flags), + ); } #[cfg(test)] From 04b2178e6f4b665b297792ff80d88651419a4d1b Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 2 Oct 2025 00:42:57 +0530 Subject: [PATCH 043/358] Add `_mm_sfence` to all non-temporal intrinsic tests --- stdarch/crates/core_arch/src/x86/avx.rs | 3 +++ stdarch/crates/core_arch/src/x86/avx512f.rs | 3 +++ stdarch/crates/core_arch/src/x86/sse.rs | 1 + stdarch/crates/core_arch/src/x86/sse2.rs | 4 ++++ stdarch/crates/core_arch/src/x86/sse4a.rs | 2 ++ stdarch/crates/core_arch/src/x86_64/sse2.rs | 1 + 6 files changed, 14 insertions(+) diff --git a/stdarch/crates/core_arch/src/x86/avx.rs b/stdarch/crates/core_arch/src/x86/avx.rs index c1bb897ce009d..d0821a4e3f37c 100644 --- a/stdarch/crates/core_arch/src/x86/avx.rs +++ b/stdarch/crates/core_arch/src/x86/avx.rs @@ -4291,6 +4291,7 @@ mod tests { let a = _mm256_setr_epi64x(1, 2, 3, 4); let mut r = _mm256_undefined_si256(); _mm256_stream_si256(ptr::addr_of_mut!(r), a); + _mm_sfence(); assert_eq_m256i(r, a); } @@ -4305,6 +4306,7 @@ mod tests { let mut mem = Memory { data: [-1.0; 4] }; _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a); + _mm_sfence(); for i in 0..4 { assert_eq!(mem.data[i], get_m256d(a, i)); } @@ -4321,6 +4323,7 @@ mod tests { let mut mem = Memory { data: [-1.0; 8] }; _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a); + _mm_sfence(); for i in 0..8 { assert_eq!(mem.data[i], get_m256(a, i)); } diff --git a/stdarch/crates/core_arch/src/x86/avx512f.rs b/stdarch/crates/core_arch/src/x86/avx512f.rs index 7f8f6b9cda378..743dabf798858 100644 --- a/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -56328,6 +56328,7 @@ mod tests { let mut mem = Memory { data: [-1.0; 16] }; _mm512_stream_ps(&mut mem.data[0] as *mut f32, a); + _mm_sfence(); for i in 0..16 { assert_eq!(mem.data[i], get_m512(a, i)); } @@ -56344,6 +56345,7 @@ mod tests { let mut mem = Memory { data: [-1.0; 8] }; _mm512_stream_pd(&mut mem.data[0] as *mut f64, a); + _mm_sfence(); for i in 0..8 { assert_eq!(mem.data[i], get_m512d(a, i)); } @@ -56360,6 +56362,7 @@ mod tests { let mut mem = Memory { data: [-1; 8] }; _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a); + _mm_sfence(); for i in 0..8 { assert_eq!(mem.data[i], get_m512i(a, i)); } diff --git a/stdarch/crates/core_arch/src/x86/sse.rs b/stdarch/crates/core_arch/src/x86/sse.rs index c5c6dc26b5b62..f47f9242ea77f 100644 --- a/stdarch/crates/core_arch/src/x86/sse.rs +++ b/stdarch/crates/core_arch/src/x86/sse.rs @@ -3329,6 +3329,7 @@ mod tests { let mut mem = Memory { data: [-1.0; 4] }; _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a); + _mm_sfence(); for i in 0..4 { assert_eq!(mem.data[i], get_m128(a, i)); } diff --git a/stdarch/crates/core_arch/src/x86/sse2.rs b/stdarch/crates/core_arch/src/x86/sse2.rs index c9530a237a7d0..aad79f28ccfa1 100644 --- a/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/stdarch/crates/core_arch/src/x86/sse2.rs @@ -4070,6 +4070,7 @@ mod tests { ); let mut r = _mm_set1_epi8(0); _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8); + _mm_sfence(); let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } @@ -4106,6 +4107,7 @@ mod tests { let a = _mm_setr_epi32(1, 2, 3, 4); let mut r = _mm_undefined_si128(); _mm_stream_si128(ptr::addr_of_mut!(r), a); + _mm_sfence(); assert_eq_m128i(r, a); } @@ -4117,6 +4119,7 @@ mod tests { let a: i32 = 7; let mut mem = boxed::Box::::new(-1); _mm_stream_si32(ptr::addr_of_mut!(*mem), a); + _mm_sfence(); assert_eq!(a, *mem); } @@ -4813,6 +4816,7 @@ mod tests { let mut mem = Memory { data: [-1.0; 2] }; _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a); + _mm_sfence(); for i in 0..2 { assert_eq!(mem.data[i], get_m128d(a, i)); } diff --git a/stdarch/crates/core_arch/src/x86/sse4a.rs b/stdarch/crates/core_arch/src/x86/sse4a.rs index b9692a2783a7a..fc0af10f9d14e 100644 --- a/stdarch/crates/core_arch/src/x86/sse4a.rs +++ b/stdarch/crates/core_arch/src/x86/sse4a.rs @@ -215,6 +215,7 @@ mod tests { let x = _mm_setr_pd(3.0, 4.0); _mm_stream_sd(d, x); + _mm_sfence(); } assert_eq!(mem.data[0], 3.0); assert_eq!(mem.data[1], 2.0); @@ -240,6 +241,7 @@ mod tests { let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); _mm_stream_ss(d, x); + _mm_sfence(); } assert_eq!(mem.data[0], 5.0); assert_eq!(mem.data[1], 2.0); diff --git a/stdarch/crates/core_arch/src/x86_64/sse2.rs b/stdarch/crates/core_arch/src/x86_64/sse2.rs index 475e2d2a83cc3..464f9ca4e115e 100644 --- a/stdarch/crates/core_arch/src/x86_64/sse2.rs +++ b/stdarch/crates/core_arch/src/x86_64/sse2.rs @@ -200,6 +200,7 @@ mod tests { let a: i64 = 7; let mut mem = boxed::Box::::new(-1); _mm_stream_si64(ptr::addr_of_mut!(*mem), a); + _mm_sfence(); assert_eq!(a, *mem); } From 885db5f26a5d2326584114beee739791fbcf9963 Mon Sep 17 00:00:00 2001 From: sayantn Date: Sun, 5 Oct 2025 07:04:36 +0530 Subject: [PATCH 044/358] Add comments in NT asm blocks for future reference --- stdarch/crates/core_arch/src/x86/avx.rs | 3 +++ stdarch/crates/core_arch/src/x86/avx512f.rs | 3 +++ stdarch/crates/core_arch/src/x86/sse.rs | 1 + stdarch/crates/core_arch/src/x86/sse2.rs | 3 +++ stdarch/crates/core_arch/src/x86/sse4a.rs | 2 ++ stdarch/crates/core_arch/src/x86_64/sse2.rs | 1 + 6 files changed, 13 insertions(+) diff --git a/stdarch/crates/core_arch/src/x86/avx.rs b/stdarch/crates/core_arch/src/x86/avx.rs index d0821a4e3f37c..c2c2febf18291 100644 --- a/stdarch/crates/core_arch/src/x86/avx.rs +++ b/stdarch/crates/core_arch/src/x86/avx.rs @@ -1833,6 +1833,7 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vmovntdq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("vmovntdq", ",{a}"), p = in(reg) mem_addr, @@ -1861,6 +1862,7 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("vmovntpd", ",{a}"), p = in(reg) mem_addr, @@ -1890,6 +1892,7 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("vmovntps", ",{a}"), p = in(reg) mem_addr, diff --git a/stdarch/crates/core_arch/src/x86/avx512f.rs b/stdarch/crates/core_arch/src/x86/avx512f.rs index 743dabf798858..b60df7dbc9a3e 100644 --- a/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -29593,6 +29593,7 @@ pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask #[cfg_attr(test, assert_instr(vmovntps))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("vmovntps", ",{a}"), p = in(reg) mem_addr, @@ -29619,6 +29620,7 @@ pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) { #[cfg_attr(test, assert_instr(vmovntpd))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("vmovntpd", ",{a}"), p = in(reg) mem_addr, @@ -29645,6 +29647,7 @@ pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) { #[cfg_attr(test, assert_instr(vmovntdq))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("vmovntdq", ",{a}"), p = in(reg) mem_addr, diff --git a/stdarch/crates/core_arch/src/x86/sse.rs b/stdarch/crates/core_arch/src/x86/sse.rs index f47f9242ea77f..be5ce8191a5cf 100644 --- a/stdarch/crates/core_arch/src/x86/sse.rs +++ b/stdarch/crates/core_arch/src/x86/sse.rs @@ -2022,6 +2022,7 @@ unsafe extern "C" { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("movntps", ",{a}"), p = in(reg) mem_addr, diff --git a/stdarch/crates/core_arch/src/x86/sse2.rs b/stdarch/crates/core_arch/src/x86/sse2.rs index aad79f28ccfa1..2bdadd0b4b277 100644 --- a/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/stdarch/crates/core_arch/src/x86/sse2.rs @@ -1363,6 +1363,7 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { #[cfg_attr(test, assert_instr(movntdq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("movntdq", ",{a}"), p = in(reg) mem_addr, @@ -1390,6 +1391,7 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { #[cfg_attr(test, assert_instr(movnti))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("movnti", ",{a:e}"), // `:e` for 32bit value p = in(reg) mem_addr, @@ -2627,6 +2629,7 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("movntpd", ",{a}"), p = in(reg) mem_addr, diff --git a/stdarch/crates/core_arch/src/x86/sse4a.rs b/stdarch/crates/core_arch/src/x86/sse4a.rs index fc0af10f9d14e..7978d018e466c 100644 --- a/stdarch/crates/core_arch/src/x86/sse4a.rs +++ b/stdarch/crates/core_arch/src/x86/sse4a.rs @@ -110,6 +110,7 @@ pub fn _mm_inserti_si64(x: __m128i, y: __m128i) #[cfg_attr(test, assert_instr(movntsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("movntsd", ",{a}"), p = in(reg) p, @@ -135,6 +136,7 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { #[cfg_attr(test, assert_instr(movntss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("movntss", ",{a}"), p = in(reg) p, diff --git a/stdarch/crates/core_arch/src/x86_64/sse2.rs b/stdarch/crates/core_arch/src/x86_64/sse2.rs index 464f9ca4e115e..0894aa9810de9 100644 --- a/stdarch/crates/core_arch/src/x86_64/sse2.rs +++ b/stdarch/crates/core_arch/src/x86_64/sse2.rs @@ -78,6 +78,7 @@ pub fn _mm_cvttsd_si64x(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(movnti))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { + // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough crate::arch::asm!( vps!("movnti", ",{a}"), p = in(reg) mem_addr, From c1f2266aade49c90d753ebe3051637d2608e3157 Mon Sep 17 00:00:00 2001 From: cyrgani <85427285+cyrgani@users.noreply.github.com> Date: Sun, 5 Oct 2025 13:58:11 +0200 Subject: [PATCH 045/358] give a better example why `std` modules named like primitives are needed --- std/src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/std/src/lib.rs b/std/src/lib.rs index da41c1216c4d5..46eb120d65de3 100644 --- a/std/src/lib.rs +++ b/std/src/lib.rs @@ -63,10 +63,10 @@ //! type, but not the all-important methods. //! //! So for example there is a [page for the primitive type -//! `i32`](primitive::i32) that lists all the methods that can be called on -//! 32-bit integers (very useful), and there is a [page for the module -//! `std::i32`] that documents the constant values [`MIN`] and [`MAX`] (rarely -//! useful). +//! `char`](primitive::char) that lists all the methods that can be called on +//! characters (very useful), and there is a [page for the module +//! `std::char`] that documents iterator and error types created by these methods +//! (rarely useful). //! //! Note the documentation for the primitives [`str`] and [`[T]`][prim@slice] (also //! called 'slice'). Many method calls on [`String`] and [`Vec`] are actually From 6c9086200e31694cf826771f7f79c41ef8208708 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sun, 5 Oct 2025 00:28:23 +0200 Subject: [PATCH 046/358] only call polymorphic array iter drop machinery when the type requires it --- core/src/array/iter.rs | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/core/src/array/iter.rs b/core/src/array/iter.rs index fdae5c08f1e8e..1c1f4d78c03fd 100644 --- a/core/src/array/iter.rs +++ b/core/src/array/iter.rs @@ -2,9 +2,9 @@ use crate::intrinsics::transmute_unchecked; use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccessNoCoerce}; -use crate::mem::MaybeUninit; +use crate::mem::{ManuallyDrop, MaybeUninit}; use crate::num::NonZero; -use crate::ops::{IndexRange, Range, Try}; +use crate::ops::{Deref as _, DerefMut as _, IndexRange, Range, Try}; use crate::{fmt, ptr}; mod iter_inner; @@ -18,17 +18,17 @@ type InnerUnsized = iter_inner::PolymorphicIter<[MaybeUninit]>; #[rustc_diagnostic_item = "ArrayIntoIter"] #[derive(Clone)] pub struct IntoIter { - inner: InnerSized, + inner: ManuallyDrop>, } impl IntoIter { #[inline] fn unsize(&self) -> &InnerUnsized { - &self.inner + self.inner.deref() } #[inline] fn unsize_mut(&mut self) -> &mut InnerUnsized { - &mut self.inner + self.inner.deref_mut() } } @@ -69,7 +69,7 @@ impl IntoIterator for [T; N] { // SAFETY: The original array was entirely initialized and the the alive // range we're passing here represents that fact. let inner = unsafe { InnerSized::new_unchecked(IndexRange::zero_to(N), data) }; - IntoIter { inner } + IntoIter { inner: ManuallyDrop::new(inner) } } } @@ -146,7 +146,7 @@ impl IntoIter { let alive = unsafe { IndexRange::new_unchecked(initialized.start, initialized.end) }; // SAFETY: one of our safety condition is that these items are initialized. let inner = unsafe { InnerSized::new_unchecked(alive, buffer) }; - IntoIter { inner } + IntoIter { inner: ManuallyDrop::new(inner) } } /// Creates an iterator over `T` which returns no elements. @@ -205,7 +205,7 @@ impl IntoIter { #[inline] pub const fn empty() -> Self { let inner = InnerSized::empty(); - IntoIter { inner } + IntoIter { inner: ManuallyDrop::new(inner) } } /// Returns an immutable slice of all elements that have not been yielded @@ -320,11 +320,20 @@ impl DoubleEndedIterator for IntoIter { } #[stable(feature = "array_value_iter_impls", since = "1.40.0")] +// Even though all the Drop logic could be completely handled by +// PolymorphicIter, this impl still serves two purposes: +// - Drop has been part of the public API, so we can't remove it +// - the partial_drop function doesn't always get fully optimized away +// for !Drop types and ends up as dead code in the final binary. +// Branching on needs_drop higher in the call-tree allows it to be +// removed by earlier optimization passes. impl Drop for IntoIter { #[inline] fn drop(&mut self) { - // `inner` now handles this, but it'd technically be a breaking change - // to remove this `impl`, even though it's useless. + if crate::mem::needs_drop::() { + // SAFETY: This is the only place where we drop this field. + unsafe { ManuallyDrop::drop(&mut self.inner) } + } } } From 92278fcc2fa4904b4a0dd6e79979b38c00dd0714 Mon Sep 17 00:00:00 2001 From: Tsukasa OI Date: Tue, 30 Sep 2025 00:16:44 +0000 Subject: [PATCH 047/358] RISC-V: Use symbolic instructions on inline assembly (part 1) While many intrinsics use `.insn` to generate raw machine code from numbers, all ratified instructions can be symbolic using `.option` directives. By saving the assembler environment with `.option push` then modifying the architecture with `.option arch`, we can temporarily enable certain extensions (as we use `.option pop` immediately after the target instruction, surrounding environment is completely intact in this commit; *almost* completely intact in general). This commit modifies the `pause` *hint* intrinsic to use symbolic *instruction* because we want to expose it even if the Zihintpause extension is unavailable on the target. --- stdarch/crates/core_arch/src/riscv_shared/mod.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/stdarch/crates/core_arch/src/riscv_shared/mod.rs b/stdarch/crates/core_arch/src/riscv_shared/mod.rs index 1bd147a64808c..f3933f58b1ca5 100644 --- a/stdarch/crates/core_arch/src/riscv_shared/mod.rs +++ b/stdarch/crates/core_arch/src/riscv_shared/mod.rs @@ -44,9 +44,14 @@ use crate::arch::asm; #[inline] #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] pub fn pause() { + // Use `.option` directives to expose this HINT instruction + // (no-op if not supported by the hardware) without `#[target_feature]`. unsafe { asm!( - ".insn i 0x0F, 0, x0, x0, 0x010", + ".option push", + ".option arch, +zihintpause", + "pause", + ".option pop", options(nomem, nostack, preserves_flags) ); } From 5f13604677fc4808152b451bb43c2a5835ad06c5 Mon Sep 17 00:00:00 2001 From: Nathaniel McCallum Date: Sat, 13 Sep 2025 08:51:04 -0400 Subject: [PATCH 048/358] constify basic Clone impls --- core/src/clone.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/core/src/clone.rs b/core/src/clone.rs index 7f2a40f753fa6..06d2c93cc698f 100644 --- a/core/src/clone.rs +++ b/core/src/clone.rs @@ -575,7 +575,8 @@ mod impls { ($($t:ty)*) => { $( #[stable(feature = "rust1", since = "1.0.0")] - impl Clone for $t { + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + impl const Clone for $t { #[inline(always)] fn clone(&self) -> Self { *self @@ -593,7 +594,8 @@ mod impls { } #[unstable(feature = "never_type", issue = "35121")] - impl Clone for ! { + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + impl const Clone for ! { #[inline] fn clone(&self) -> Self { *self @@ -601,7 +603,8 @@ mod impls { } #[stable(feature = "rust1", since = "1.0.0")] - impl Clone for *const T { + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + impl const Clone for *const T { #[inline(always)] fn clone(&self) -> Self { *self @@ -609,7 +612,8 @@ mod impls { } #[stable(feature = "rust1", since = "1.0.0")] - impl Clone for *mut T { + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + impl const Clone for *mut T { #[inline(always)] fn clone(&self) -> Self { *self @@ -618,7 +622,8 @@ mod impls { /// Shared references can be cloned, but mutable references *cannot*! #[stable(feature = "rust1", since = "1.0.0")] - impl Clone for &T { + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + impl const Clone for &T { #[inline(always)] #[rustc_diagnostic_item = "noop_method_clone"] fn clone(&self) -> Self { From 79612a1dcdb597d02f49c39760f540c82812f38d Mon Sep 17 00:00:00 2001 From: Samuel Tardieu Date: Mon, 6 Oct 2025 23:50:47 +0200 Subject: [PATCH 049/358] Add diagnostic items for `pub mod consts` of FP types They will be used in Clippy. --- core/src/num/f128.rs | 1 + core/src/num/f16.rs | 1 + core/src/num/f32.rs | 1 + core/src/num/f64.rs | 1 + 4 files changed, 4 insertions(+) diff --git a/core/src/num/f128.rs b/core/src/num/f128.rs index 4fe4735e304c9..e7101537b298f 100644 --- a/core/src/num/f128.rs +++ b/core/src/num/f128.rs @@ -18,6 +18,7 @@ use crate::{intrinsics, mem}; /// Basic mathematical constants. #[unstable(feature = "f128", issue = "116909")] +#[rustc_diagnostic_item = "f128_consts_mod"] pub mod consts { // FIXME: replace with mathematical constants from cmath. diff --git a/core/src/num/f16.rs b/core/src/num/f16.rs index 0bea6bc8801d8..aa8342a22ad58 100644 --- a/core/src/num/f16.rs +++ b/core/src/num/f16.rs @@ -20,6 +20,7 @@ use crate::{intrinsics, mem}; /// Basic mathematical constants. #[unstable(feature = "f16", issue = "116909")] +#[rustc_diagnostic_item = "f16_consts_mod"] pub mod consts { // FIXME: replace with mathematical constants from cmath. diff --git a/core/src/num/f32.rs b/core/src/num/f32.rs index e380cc698f574..3070e1dedbe43 100644 --- a/core/src/num/f32.rs +++ b/core/src/num/f32.rs @@ -277,6 +277,7 @@ pub const NEG_INFINITY: f32 = f32::NEG_INFINITY; /// Basic mathematical constants. #[stable(feature = "rust1", since = "1.0.0")] +#[rustc_diagnostic_item = "f32_consts_mod"] pub mod consts { // FIXME: replace with mathematical constants from cmath. diff --git a/core/src/num/f64.rs b/core/src/num/f64.rs index ff7449fd996ce..dc8ccc551b2da 100644 --- a/core/src/num/f64.rs +++ b/core/src/num/f64.rs @@ -277,6 +277,7 @@ pub const NEG_INFINITY: f64 = f64::NEG_INFINITY; /// Basic mathematical constants. #[stable(feature = "rust1", since = "1.0.0")] +#[rustc_diagnostic_item = "f64_consts_mod"] pub mod consts { // FIXME: replace with mathematical constants from cmath. From 6a3da86c2dca48bb2abf755967b1d21512104213 Mon Sep 17 00:00:00 2001 From: Marijn Schouten Date: Thu, 2 Oct 2025 07:12:08 +0000 Subject: [PATCH 050/358] iter repeat: panic on last --- core/src/iter/sources/repeat.rs | 3 ++- coretests/tests/iter/sources.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/iter/sources/repeat.rs b/core/src/iter/sources/repeat.rs index 4bcd5b16aea6a..ac218e9b617a3 100644 --- a/core/src/iter/sources/repeat.rs +++ b/core/src/iter/sources/repeat.rs @@ -97,8 +97,9 @@ impl Iterator for Repeat { Some(self.element.clone()) } + #[track_caller] fn last(self) -> Option { - Some(self.element) + panic!("iterator is infinite"); } #[track_caller] diff --git a/coretests/tests/iter/sources.rs b/coretests/tests/iter/sources.rs index 5a391cb67751d..420f3088e6ee4 100644 --- a/coretests/tests/iter/sources.rs +++ b/coretests/tests/iter/sources.rs @@ -37,6 +37,7 @@ fn test_repeat_count() { } #[test] +#[should_panic = "iterator is infinite"] fn test_repeat_last() { assert_eq!(repeat(42).last(), Some(42)); } From 984cb16a253be92f9df3d80f291f083bff431b02 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Tue, 7 Oct 2025 13:07:23 -0700 Subject: [PATCH 051/358] library: fs: Factor out a `file_time_to_timespec` function in preparation for reusing it --- std/src/sys/fs/unix.rs | 49 ++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index 33a1e7ff5e40e..bcd5ea6494447 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -1604,24 +1604,6 @@ impl File { } pub fn set_times(&self, times: FileTimes) -> io::Result<()> { - #[cfg(not(any( - target_os = "redox", - target_os = "espidf", - target_os = "horizon", - target_os = "nuttx", - )))] - let to_timespec = |time: Option| match time { - Some(time) if let Some(ts) = time.t.to_timespec() => Ok(ts), - Some(time) if time > crate::sys::time::UNIX_EPOCH => Err(io::const_error!( - io::ErrorKind::InvalidInput, - "timestamp is too large to set as a file time", - )), - Some(_) => Err(io::const_error!( - io::ErrorKind::InvalidInput, - "timestamp is too small to set as a file time", - )), - None => Ok(libc::timespec { tv_sec: 0, tv_nsec: libc::UTIME_OMIT as _ }), - }; cfg_select! { any(target_os = "redox", target_os = "espidf", target_os = "horizon", target_os = "nuttx") => { // Redox doesn't appear to support `UTIME_OMIT`. @@ -1639,17 +1621,17 @@ impl File { let mut attrlist: libc::attrlist = unsafe { mem::zeroed() }; attrlist.bitmapcount = libc::ATTR_BIT_MAP_COUNT; if times.created.is_some() { - buf[num_times].write(to_timespec(times.created)?); + buf[num_times].write(file_time_to_timespec(times.created)?); num_times += 1; attrlist.commonattr |= libc::ATTR_CMN_CRTIME; } if times.modified.is_some() { - buf[num_times].write(to_timespec(times.modified)?); + buf[num_times].write(file_time_to_timespec(times.modified)?); num_times += 1; attrlist.commonattr |= libc::ATTR_CMN_MODTIME; } if times.accessed.is_some() { - buf[num_times].write(to_timespec(times.accessed)?); + buf[num_times].write(file_time_to_timespec(times.accessed)?); num_times += 1; attrlist.commonattr |= libc::ATTR_CMN_ACCTIME; } @@ -1663,7 +1645,7 @@ impl File { Ok(()) } target_os = "android" => { - let times = [to_timespec(times.accessed)?, to_timespec(times.modified)?]; + let times = [file_time_to_timespec(times.accessed)?, file_time_to_timespec(times.modified)?]; // futimens requires Android API level 19 cvt(unsafe { weak!( @@ -1697,7 +1679,7 @@ impl File { return Ok(()); } } - let times = [to_timespec(times.accessed)?, to_timespec(times.modified)?]; + let times = [file_time_to_timespec(times.accessed)?, file_time_to_timespec(times.modified)?]; cvt(unsafe { libc::futimens(self.as_raw_fd(), times.as_ptr()) })?; Ok(()) } @@ -1705,6 +1687,27 @@ impl File { } } +#[cfg(not(any( + target_os = "redox", + target_os = "espidf", + target_os = "horizon", + target_os = "nuttx", +)))] +fn file_time_to_timespec(time: Option) -> io::Result { + match time { + Some(time) if let Some(ts) = time.t.to_timespec() => Ok(ts), + Some(time) if time > crate::sys::time::UNIX_EPOCH => Err(io::const_error!( + io::ErrorKind::InvalidInput, + "timestamp is too large to set as a file time", + )), + Some(_) => Err(io::const_error!( + io::ErrorKind::InvalidInput, + "timestamp is too small to set as a file time", + )), + None => Ok(libc::timespec { tv_sec: 0, tv_nsec: libc::UTIME_OMIT as _ }), + } +}; + impl DirBuilder { pub fn new() -> DirBuilder { DirBuilder { mode: 0o777 } From 583c6eb6b95042a5c1e9443ee3fbceeebde36939 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Tue, 7 Oct 2025 13:18:38 -0700 Subject: [PATCH 052/358] library: fs: Factor out the Apple file time to attrlist code for reuse --- std/src/sys/fs/unix.rs | 74 +++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index bcd5ea6494447..bed9ea9139834 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -1616,30 +1616,12 @@ impl File { )) } target_vendor = "apple" => { - let mut buf = [mem::MaybeUninit::::uninit(); 3]; - let mut num_times = 0; - let mut attrlist: libc::attrlist = unsafe { mem::zeroed() }; - attrlist.bitmapcount = libc::ATTR_BIT_MAP_COUNT; - if times.created.is_some() { - buf[num_times].write(file_time_to_timespec(times.created)?); - num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_CRTIME; - } - if times.modified.is_some() { - buf[num_times].write(file_time_to_timespec(times.modified)?); - num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_MODTIME; - } - if times.accessed.is_some() { - buf[num_times].write(file_time_to_timespec(times.accessed)?); - num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_ACCTIME; - } + let ta = TimesAttrlist::from_times(×)?; cvt(unsafe { libc::fsetattrlist( self.as_raw_fd(), - (&raw const attrlist).cast::().cast_mut(), - buf.as_ptr().cast::().cast_mut(), - num_times * size_of::(), + ta.attrlist(), + ta.times_buf(), + ta.times_buf_size(), 0 ) })?; Ok(()) @@ -1706,7 +1688,53 @@ fn file_time_to_timespec(time: Option) -> io::Result )), None => Ok(libc::timespec { tv_sec: 0, tv_nsec: libc::UTIME_OMIT as _ }), } -}; +} + +#[cfg(target_vendor = "apple")] +struct TimesAttrlist { + buf: [mem::MaybeUninit; 3], + attrlist: libc::attrlist, + num_times: usize, +} + +#[cfg(target_vendor = "apple")] +impl TimesAttrlist { + fn from_times(times: &FileTimes) -> io::Result { + let mut this = Self { + buf: [mem::MaybeUninit::::uninit(); 3], + attrlist: unsafe { mem::zeroed() }, + num_times: 0, + }; + this.attrlist.bitmapcount = libc::ATTR_BIT_MAP_COUNT; + if times.created.is_some() { + this.buf[this.num_times].write(file_time_to_timespec(times.created)?); + this.num_times += 1; + attrlist.commonattr |= libc::ATTR_CMN_CRTIME; + } + if times.modified.is_some() { + this.buf[this.num_times].write(file_time_to_timespec(times.modified)?); + this.num_times += 1; + attrlist.commonattr |= libc::ATTR_CMN_MODTIME; + } + if times.accessed.is_some() { + this.buf[this.num_times].write(file_time_to_timespec(times.accessed)?); + this.num_times += 1; + attrlist.commonattr |= libc::ATTR_CMN_ACCTIME; + } + } + + fn attrlist(&self) -> *mut libc::c_void { + (&raw const self.attrlist).cast::().cast_mut() + } + + fn times_buf(&self) -> *mut libc::c_void { + self.buf.as_ptr().cast::().cast_mut() + } + + fn times_buf_size(&self) -> usize { + self.num_times * size_of::() + } +} impl DirBuilder { pub fn new() -> DirBuilder { From 945b74a5c3ced46aa90b526bf356fbe1267ddbfc Mon Sep 17 00:00:00 2001 From: Julien Cretin Date: Wed, 8 Oct 2025 11:29:20 +0200 Subject: [PATCH 053/358] Clarify how to remediate the panic_immediate_abort error Users who build `core` for the sole purpose of enabling `panic_immediate_abort` might expect "`panic_immediate_abort` is now a real panic strategy" to mean that setting `panic = "immediate-abort"` in `Cargo.toml` or `-Cpanic=immediate-abort` in `RUSTFLAGS` to be sufficient for migration. But this is not the case, `core` still needs to be built for those changes to take effect. See https://github.com/rust-lang/rust/issues/146974 for additional context. --- core/src/panicking.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/panicking.rs b/core/src/panicking.rs index b5150837e6a94..448f4ffc3dae0 100644 --- a/core/src/panicking.rs +++ b/core/src/panicking.rs @@ -36,7 +36,8 @@ use crate::panic::{Location, PanicInfo}; compile_error!( "panic_immediate_abort is now a real panic strategy! \ Enable it with `panic = \"immediate-abort\"` in Cargo.toml, \ - or with the compiler flags `-Zunstable-options -Cpanic=immediate-abort`" + or with the compiler flags `-Zunstable-options -Cpanic=immediate-abort`. \ + In both cases, you still need to build core, e.g. with `-Zbuild-std`" ); // First we define the two main entry points that all panics go through. From da1fb227e673141aae73648076a454b4dc17abf7 Mon Sep 17 00:00:00 2001 From: sayantn Date: Tue, 3 Jun 2025 16:20:08 +0530 Subject: [PATCH 054/358] use simd intrinsics for `vec_max` and `vec_min` --- stdarch/crates/core_arch/src/s390x/vector.rs | 118 +++++++++++-------- 1 file changed, 70 insertions(+), 48 deletions(-) diff --git a/stdarch/crates/core_arch/src/s390x/vector.rs b/stdarch/crates/core_arch/src/s390x/vector.rs index f018344ead12d..7208105fb8721 100644 --- a/stdarch/crates/core_arch/src/s390x/vector.rs +++ b/stdarch/crates/core_arch/src/s390x/vector.rs @@ -60,26 +60,6 @@ struct PackedTuple { #[allow(improper_ctypes)] #[rustfmt::skip] unsafe extern "unadjusted" { - #[link_name = "llvm.smax.v16i8"] fn vmxb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; - #[link_name = "llvm.smax.v8i16"] fn vmxh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; - #[link_name = "llvm.smax.v4i32"] fn vmxf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.smax.v2i64"] fn vmxg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long; - - #[link_name = "llvm.umax.v16i8"] fn vmxlb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; - #[link_name = "llvm.umax.v8i16"] fn vmxlh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; - #[link_name = "llvm.umax.v4i32"] fn vmxlf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; - #[link_name = "llvm.umax.v2i64"] fn vmxlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long; - - #[link_name = "llvm.smin.v16i8"] fn vmnb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; - #[link_name = "llvm.smin.v8i16"] fn vmnh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; - #[link_name = "llvm.smin.v4i32"] fn vmnf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.smin.v2i64"] fn vmng(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long; - - #[link_name = "llvm.umin.v16i8"] fn vmnlb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; - #[link_name = "llvm.umin.v8i16"] fn vmnlh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; - #[link_name = "llvm.umin.v4i32"] fn vmnlf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; - #[link_name = "llvm.umin.v2i64"] fn vmnlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long; - #[link_name = "llvm.nearbyint.v4f32"] fn nearbyint_v4f32(a: vector_float) -> vector_float; #[link_name = "llvm.nearbyint.v2f64"] fn nearbyint_v2f64(a: vector_double) -> vector_double; @@ -683,17 +663,40 @@ mod sealed { unsafe fn vec_max(self, b: Other) -> Self::Result; } - test_impl! { vec_vmxsb (a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [vmxb, vmxb] } - test_impl! { vec_vmxsh (a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [vmxh, vmxh] } - test_impl! { vec_vmxsf (a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [vmxf, vmxf] } - test_impl! { vec_vmxsg (a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long [vmxg, vmxg] } + macro_rules! impl_max { + ($name:ident, $a:ty, $instr:ident) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $name(a: $a, b: $a) -> $a { + simd_select(simd_ge::<_, $a>(a, b), a, b) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMax for $a { + type Result = Self; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_max(self, other: Self) -> Self { + $name(self, other) + } + } + }; + } - test_impl! { vec_vmxslb (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [vmxlb, vmxlb] } - test_impl! { vec_vmxslh (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [vmxlh, vmxlh] } - test_impl! { vec_vmxslf (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vmxlf, vmxlf] } - test_impl! { vec_vmxslg (a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [vmxlg, vmxlg] } + mod impl_max { + use super::*; - impl_vec_trait! { [VectorMax vec_max] ~(vmxlb, vmxb, vmxlh, vmxh, vmxlf, vmxf, vmxlg, vmxg) } + impl_max!(vec_vmxsc, vector_signed_char, vmxb); + impl_max!(vec_vmxslc, vector_unsigned_char, vmxlb); + impl_max!(vec_vmxsh, vector_signed_short, vmxh); + impl_max!(vec_vmxslh, vector_unsigned_short, vmxlh); + impl_max!(vec_vmxsf, vector_signed_int, vmxf); + impl_max!(vec_vmxslf, vector_unsigned_int, vmxlf); + impl_max!(vec_vmxsg, vector_signed_long_long, vmxg); + impl_max!(vec_vmxslg, vector_unsigned_long_long, vmxlg); + } test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, "vector-enhancements-1" vfmaxsb ] } test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, "vector-enhancements-1" vfmaxdb] } @@ -707,17 +710,40 @@ mod sealed { unsafe fn vec_min(self, b: Other) -> Self::Result; } - test_impl! { vec_vmnsb (a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [vmnb, vmnb] } - test_impl! { vec_vmnsh (a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [vmnh, vmnh] } - test_impl! { vec_vmnsf (a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [vmnf, vmnf] } - test_impl! { vec_vmnsg (a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long [vmng, vmng] } + macro_rules! impl_min { + ($name:ident, $a:ty, $instr:ident) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + pub unsafe fn $name(a: $a, b: $a) -> $a { + simd_select(simd_le::<_, $a>(a, b), a, b) + } - test_impl! { vec_vmnslb (a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [vmnlb, vmnlb] } - test_impl! { vec_vmnslh (a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [vmnlh, vmnlh] } - test_impl! { vec_vmnslf (a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [vmnlf, vmnlf] } - test_impl! { vec_vmnslg (a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [vmnlg, vmnlg] } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMin for $a { + type Result = Self; - impl_vec_trait! { [VectorMin vec_min] ~(vmxlb, vmxb, vmxlh, vmxh, vmxlf, vmxf, vmxlg, vmxg) } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_min(self, other: Self) -> Self { + $name(self, other) + } + } + }; + } + + mod impl_min { + use super::*; + + impl_min!(vec_vmnsc, vector_signed_char, vmnb); + impl_min!(vec_vmnslc, vector_unsigned_char, vmnlb); + impl_min!(vec_vmnsh, vector_signed_short, vmnh); + impl_min!(vec_vmnslh, vector_unsigned_short, vmnlh); + impl_min!(vec_vmnsf, vector_signed_int, vmnf); + impl_min!(vec_vmnslf, vector_unsigned_int, vmnlf); + impl_min!(vec_vmnsg, vector_signed_long_long, vmng); + impl_min!(vec_vmnslg, vector_unsigned_long_long, vmnlg); + } test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, "vector-enhancements-1" vfminsb] } test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, "vector-enhancements-1" vfmindb] } @@ -2368,17 +2394,13 @@ mod sealed { unsafe fn vec_packsu(self, b: Other) -> Self::Result; } - unsafe fn simd_smax(a: T, b: T) -> T { - simd_select::(simd_gt::(a, b), a, b) - } - #[inline] #[target_feature(enable = "vector")] #[cfg_attr(test, assert_instr(vpklsh))] unsafe fn vpacksuh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char { vpklsh( - simd_smax(a, vector_signed_short([0; 8])), - simd_smax(b, vector_signed_short([0; 8])), + vec_max(a, vector_signed_short([0; 8])), + vec_max(b, vector_signed_short([0; 8])), ) } #[inline] @@ -2386,8 +2408,8 @@ mod sealed { #[cfg_attr(test, assert_instr(vpklsf))] unsafe fn vpacksuf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short { vpklsf( - simd_smax(a, vector_signed_int([0; 4])), - simd_smax(b, vector_signed_int([0; 4])), + vec_max(a, vector_signed_int([0; 4])), + vec_max(b, vector_signed_int([0; 4])), ) } #[inline] @@ -2398,8 +2420,8 @@ mod sealed { b: vector_signed_long_long, ) -> vector_unsigned_int { vpklsg( - simd_smax(a, vector_signed_long_long([0; 2])), - simd_smax(b, vector_signed_long_long([0; 2])), + vec_max(a, vector_signed_long_long([0; 2])), + vec_max(b, vector_signed_long_long([0; 2])), ) } From ca07370ebb33d5ff56c2ecc3d267aa69eb8183f3 Mon Sep 17 00:00:00 2001 From: "U. Lasiotus" Date: Tue, 7 Oct 2025 18:56:25 -0700 Subject: [PATCH 055/358] Add Motor OS std library port Motor OS was added as a no-std Tier-3 target in https://github.com/rust-lang/rust/pull/146848 as x86_64-unknown-motor. This patch/PR adds the std library for Motor OS. While the patch may seem large, all it does is proxy std pal calls to moto-rt. When there is some non-trivial code (e.g. thread::spawn), it is quite similar, and often identical, to what other platforms do. --- Cargo.lock | 11 + std/Cargo.toml | 3 + std/build.rs | 1 + std/src/os/fd/owned.rs | 22 +- std/src/os/fd/raw.rs | 10 +- std/src/os/mod.rs | 11 +- std/src/os/motor/ffi.rs | 37 ++ std/src/os/motor/mod.rs | 4 + std/src/os/motor/process.rs | 15 + std/src/sys/alloc/mod.rs | 3 + std/src/sys/alloc/motor.rs | 28 ++ std/src/sys/anonymous_pipe/mod.rs | 4 + std/src/sys/anonymous_pipe/motor.rs | 11 + std/src/sys/args/mod.rs | 5 + std/src/sys/args/motor.rs | 13 + std/src/sys/env/mod.rs | 5 + std/src/sys/env/motor.rs | 27 ++ std/src/sys/fd/mod.rs | 4 + std/src/sys/fd/motor.rs | 124 ++++++ std/src/sys/fs/mod.rs | 4 + std/src/sys/fs/motor.rs | 478 +++++++++++++++++++++++ std/src/sys/io/is_terminal/motor.rs | 6 + std/src/sys/io/mod.rs | 4 + std/src/sys/net/connection/mod.rs | 4 + std/src/sys/net/connection/motor.rs | 521 +++++++++++++++++++++++++ std/src/sys/pal/mod.rs | 4 + std/src/sys/pal/motor/mod.rs | 77 ++++ std/src/sys/pal/motor/os.rs | 100 +++++ std/src/sys/pal/motor/pipe.rs | 121 ++++++ std/src/sys/pal/motor/time.rs | 1 + std/src/sys/path/unix.rs | 2 +- std/src/sys/personality/mod.rs | 2 +- std/src/sys/process/mod.rs | 6 + std/src/sys/process/motor.rs | 313 +++++++++++++++ std/src/sys/random/mod.rs | 4 + std/src/sys/random/motor.rs | 3 + std/src/sys/stdio/mod.rs | 4 + std/src/sys/stdio/motor.rs | 232 +++++++++++ std/src/sys/sync/condvar/mod.rs | 1 + std/src/sys/sync/mutex/mod.rs | 1 + std/src/sys/sync/once/mod.rs | 1 + std/src/sys/sync/rwlock/mod.rs | 1 + std/src/sys/sync/thread_parking/mod.rs | 1 + std/src/sys/thread/mod.rs | 4 + std/src/sys/thread/motor.rs | 63 +++ std/src/sys/thread_local/mod.rs | 8 + 46 files changed, 2296 insertions(+), 8 deletions(-) create mode 100644 std/src/os/motor/ffi.rs create mode 100644 std/src/os/motor/mod.rs create mode 100644 std/src/os/motor/process.rs create mode 100644 std/src/sys/alloc/motor.rs create mode 100644 std/src/sys/anonymous_pipe/motor.rs create mode 100644 std/src/sys/args/motor.rs create mode 100644 std/src/sys/env/motor.rs create mode 100644 std/src/sys/fd/motor.rs create mode 100644 std/src/sys/fs/motor.rs create mode 100644 std/src/sys/io/is_terminal/motor.rs create mode 100644 std/src/sys/net/connection/motor.rs create mode 100644 std/src/sys/pal/motor/mod.rs create mode 100644 std/src/sys/pal/motor/os.rs create mode 100644 std/src/sys/pal/motor/pipe.rs create mode 100644 std/src/sys/pal/motor/time.rs create mode 100644 std/src/sys/process/motor.rs create mode 100644 std/src/sys/random/motor.rs create mode 100644 std/src/sys/stdio/motor.rs create mode 100644 std/src/sys/thread/motor.rs diff --git a/Cargo.lock b/Cargo.lock index 47fbf5169f491..1156d6925cf2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,6 +166,16 @@ dependencies = [ "rustc-std-workspace-core", ] +[[package]] +name = "moto-rt" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "058a2807a30527bee4c30df7ababe971cdde94372d4dbd1ff145bb403381436c" +dependencies = [ + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + [[package]] name = "object" version = "0.37.3" @@ -316,6 +326,7 @@ dependencies = [ "hermit-abi", "libc", "miniz_oxide", + "moto-rt", "object", "panic_abort", "panic_unwind", diff --git a/std/Cargo.toml b/std/Cargo.toml index 779b07ce240a6..b7bf5ea7ba738 100644 --- a/std/Cargo.toml +++ b/std/Cargo.toml @@ -70,6 +70,9 @@ fortanix-sgx-abi = { version = "0.6.1", features = [ 'rustc-dep-of-std', ], public = true } +[target.'cfg(target_os = "motor")'.dependencies] +moto-rt = { version = "0.15", features = ['rustc-dep-of-std'], public = true } + [target.'cfg(target_os = "hermit")'.dependencies] hermit-abi = { version = "0.5.0", features = [ 'rustc-dep-of-std', diff --git a/std/build.rs b/std/build.rs index 8a5a785060c85..bee28e88491d0 100644 --- a/std/build.rs +++ b/std/build.rs @@ -30,6 +30,7 @@ fn main() { || target_os == "windows" || target_os == "fuchsia" || (target_vendor == "fortanix" && target_env == "sgx") + || target_os == "motor" || target_os == "hermit" || target_os == "trusty" || target_os == "l4re" diff --git a/std/src/os/fd/owned.rs b/std/src/os/fd/owned.rs index 10e1e73a115bd..6a0e7a640028b 100644 --- a/std/src/os/fd/owned.rs +++ b/std/src/os/fd/owned.rs @@ -3,6 +3,9 @@ #![stable(feature = "io_safety", since = "1.63.0")] #![deny(unsafe_op_in_unsafe_fn)] +#[cfg(target_os = "motor")] +use moto_rt::libc; + use super::raw::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; #[cfg(not(target_os = "trusty"))] use crate::fs; @@ -12,7 +15,8 @@ use crate::mem::ManuallyDrop; target_arch = "wasm32", target_env = "sgx", target_os = "hermit", - target_os = "trusty" + target_os = "trusty", + target_os = "motor" )))] use crate::sys::cvt; #[cfg(not(target_os = "trusty"))] @@ -95,7 +99,12 @@ impl OwnedFd { impl BorrowedFd<'_> { /// Creates a new `OwnedFd` instance that shares the same underlying file /// description as the existing `BorrowedFd` instance. - #[cfg(not(any(target_arch = "wasm32", target_os = "hermit", target_os = "trusty")))] + #[cfg(not(any( + target_arch = "wasm32", + target_os = "hermit", + target_os = "trusty", + target_os = "motor" + )))] #[stable(feature = "io_safety", since = "1.63.0")] pub fn try_clone_to_owned(&self) -> crate::io::Result { // We want to atomically duplicate this file descriptor and set the @@ -123,6 +132,15 @@ impl BorrowedFd<'_> { pub fn try_clone_to_owned(&self) -> crate::io::Result { Err(crate::io::Error::UNSUPPORTED_PLATFORM) } + + /// Creates a new `OwnedFd` instance that shares the same underlying file + /// description as the existing `BorrowedFd` instance. + #[cfg(target_os = "motor")] + #[stable(feature = "io_safety", since = "1.63.0")] + pub fn try_clone_to_owned(&self) -> crate::io::Result { + let fd = moto_rt::fs::duplicate(self.as_raw_fd()).map_err(crate::sys::map_motor_error)?; + Ok(unsafe { OwnedFd::from_raw_fd(fd) }) + } } #[stable(feature = "io_safety", since = "1.63.0")] diff --git a/std/src/os/fd/raw.rs b/std/src/os/fd/raw.rs index 34a6cf1a8b84d..c01e6b83cd366 100644 --- a/std/src/os/fd/raw.rs +++ b/std/src/os/fd/raw.rs @@ -4,13 +4,17 @@ #[cfg(target_os = "hermit")] use hermit_abi as libc; +#[cfg(target_os = "motor")] +use moto_rt::libc; +#[cfg(target_os = "motor")] +use super::owned::OwnedFd; #[cfg(not(target_os = "trusty"))] use crate::fs; use crate::io; #[cfg(target_os = "hermit")] use crate::os::hermit::io::OwnedFd; -#[cfg(not(target_os = "hermit"))] +#[cfg(all(not(target_os = "hermit"), not(target_os = "motor")))] use crate::os::raw; #[cfg(all(doc, not(target_arch = "wasm32")))] use crate::os::unix::io::AsFd; @@ -23,10 +27,10 @@ use crate::sys_common::{AsInner, FromInner, IntoInner}; /// Raw file descriptors. #[stable(feature = "rust1", since = "1.0.0")] -#[cfg(not(target_os = "hermit"))] +#[cfg(all(not(target_os = "hermit"), not(target_os = "motor")))] pub type RawFd = raw::c_int; #[stable(feature = "rust1", since = "1.0.0")] -#[cfg(target_os = "hermit")] +#[cfg(any(target_os = "hermit", target_os = "motor"))] pub type RawFd = i32; /// A trait to extract the raw file descriptor from an underlying object. diff --git a/std/src/os/mod.rs b/std/src/os/mod.rs index fd7a11433af1b..76374402be4b3 100644 --- a/std/src/os/mod.rs +++ b/std/src/os/mod.rs @@ -155,6 +155,8 @@ pub mod ios; pub mod l4re; #[cfg(target_os = "macos")] pub mod macos; +#[cfg(target_os = "motor")] +pub mod motor; #[cfg(target_os = "netbsd")] pub mod netbsd; #[cfg(target_os = "nto")] @@ -182,7 +184,14 @@ pub mod vxworks; #[cfg(target_os = "xous")] pub mod xous; -#[cfg(any(unix, target_os = "hermit", target_os = "trusty", target_os = "wasi", doc))] +#[cfg(any( + unix, + target_os = "hermit", + target_os = "trusty", + target_os = "wasi", + target_os = "motor", + doc +))] pub mod fd; #[cfg(any(target_os = "linux", target_os = "android", target_os = "cygwin", doc))] diff --git a/std/src/os/motor/ffi.rs b/std/src/os/motor/ffi.rs new file mode 100644 index 0000000000000..509fe641bb353 --- /dev/null +++ b/std/src/os/motor/ffi.rs @@ -0,0 +1,37 @@ +//! Motor OS-specific extensions to primitives in the [`std::ffi`] module. +#![unstable(feature = "motor_ext", issue = "147456")] + +use crate::ffi::{OsStr, OsString}; +use crate::sealed::Sealed; + +/// Motor OS-specific extensions to [`OsString`]. +/// +/// This trait is sealed: it cannot be implemented outside the standard library. +/// This is so that future additional methods are not breaking changes. +pub trait OsStringExt: Sealed { + /// Motor OS strings are utf-8, and thus just strings. + fn as_str(&self) -> &str; +} + +impl OsStringExt for OsString { + #[inline] + fn as_str(&self) -> &str { + self.to_str().unwrap() + } +} + +/// Motor OS-specific extensions to [`OsString`]. +/// +/// This trait is sealed: it cannot be implemented outside the standard library. +/// This is so that future additional methods are not breaking changes. +pub trait OsStrExt: Sealed { + /// Motor OS strings are utf-8, and thus just strings. + fn as_str(&self) -> &str; +} + +impl OsStrExt for OsStr { + #[inline] + fn as_str(&self) -> &str { + self.to_str().unwrap() + } +} diff --git a/std/src/os/motor/mod.rs b/std/src/os/motor/mod.rs new file mode 100644 index 0000000000000..18da079c74a15 --- /dev/null +++ b/std/src/os/motor/mod.rs @@ -0,0 +1,4 @@ +#![unstable(feature = "motor_ext", issue = "147456")] + +pub mod ffi; +pub mod process; diff --git a/std/src/os/motor/process.rs b/std/src/os/motor/process.rs new file mode 100644 index 0000000000000..015fbcb97f97d --- /dev/null +++ b/std/src/os/motor/process.rs @@ -0,0 +1,15 @@ +#![unstable(feature = "motor_ext", issue = "147456")] + +use crate::sealed::Sealed; +use crate::sys_common::AsInner; + +pub trait ChildExt: Sealed { + /// Extracts the main thread raw handle, without taking ownership + fn sys_handle(&self) -> u64; +} + +impl ChildExt for crate::process::Child { + fn sys_handle(&self) -> u64 { + self.as_inner().handle() + } +} diff --git a/std/src/sys/alloc/mod.rs b/std/src/sys/alloc/mod.rs index 2045b2fecc6ac..f2f1d1c7feceb 100644 --- a/std/src/sys/alloc/mod.rs +++ b/std/src/sys/alloc/mod.rs @@ -83,6 +83,9 @@ cfg_select! { target_os = "hermit" => { mod hermit; } + target_os = "motor" => { + mod motor; + } all(target_vendor = "fortanix", target_env = "sgx") => { mod sgx; } diff --git a/std/src/sys/alloc/motor.rs b/std/src/sys/alloc/motor.rs new file mode 100644 index 0000000000000..271e3c40c26ae --- /dev/null +++ b/std/src/sys/alloc/motor.rs @@ -0,0 +1,28 @@ +use crate::alloc::{GlobalAlloc, Layout, System}; + +#[stable(feature = "alloc_system_type", since = "1.28.0")] +unsafe impl GlobalAlloc for System { + #[inline] + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // SAFETY: same requirements as in GlobalAlloc::alloc. + moto_rt::alloc::alloc(layout) + } + + #[inline] + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + // SAFETY: same requirements as in GlobalAlloc::alloc_zeroed. + moto_rt::alloc::alloc_zeroed(layout) + } + + #[inline] + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + // SAFETY: same requirements as in GlobalAlloc::dealloc. + unsafe { moto_rt::alloc::dealloc(ptr, layout) } + } + + #[inline] + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + // SAFETY: same requirements as in GlobalAlloc::realloc. + unsafe { moto_rt::alloc::realloc(ptr, layout, new_size) } + } +} diff --git a/std/src/sys/anonymous_pipe/mod.rs b/std/src/sys/anonymous_pipe/mod.rs index b6f464161ee2b..64b2c014b54fe 100644 --- a/std/src/sys/anonymous_pipe/mod.rs +++ b/std/src/sys/anonymous_pipe/mod.rs @@ -9,6 +9,10 @@ cfg_select! { mod windows; pub use windows::{AnonPipe, pipe}; } + target_os = "motor" => { + mod motor; + pub use motor::{AnonPipe, pipe}; + } _ => { mod unsupported; pub use unsupported::{AnonPipe, pipe}; diff --git a/std/src/sys/anonymous_pipe/motor.rs b/std/src/sys/anonymous_pipe/motor.rs new file mode 100644 index 0000000000000..dfe10f7fafe49 --- /dev/null +++ b/std/src/sys/anonymous_pipe/motor.rs @@ -0,0 +1,11 @@ +use crate::io; +use crate::sys::fd::FileDesc; +use crate::sys::pipe::anon_pipe; +use crate::sys_common::IntoInner; + +pub type AnonPipe = FileDesc; + +#[inline] +pub fn pipe() -> io::Result<(AnonPipe, AnonPipe)> { + anon_pipe().map(|(rx, wx)| (rx.into_inner(), wx.into_inner())) +} diff --git a/std/src/sys/args/mod.rs b/std/src/sys/args/mod.rs index 75c59da721e19..5424d40a15883 100644 --- a/std/src/sys/args/mod.rs +++ b/std/src/sys/args/mod.rs @@ -6,6 +6,7 @@ all(target_family = "unix", not(any(target_os = "espidf", target_os = "vita"))), target_family = "windows", target_os = "hermit", + target_os = "motor", target_os = "uefi", target_os = "wasi", target_os = "xous", @@ -28,6 +29,10 @@ cfg_select! { mod sgx; pub use sgx::*; } + target_os = "motor" => { + mod motor; + pub use motor::*; + } target_os = "uefi" => { mod uefi; pub use uefi::*; diff --git a/std/src/sys/args/motor.rs b/std/src/sys/args/motor.rs new file mode 100644 index 0000000000000..c3dbe87cec411 --- /dev/null +++ b/std/src/sys/args/motor.rs @@ -0,0 +1,13 @@ +pub use super::common::Args; +use crate::ffi::OsString; + +pub fn args() -> Args { + let motor_args: Vec = moto_rt::process::args(); + let mut rust_args = Vec::new(); + + for arg in motor_args { + rust_args.push(OsString::from(arg)); + } + + Args::new(rust_args) +} diff --git a/std/src/sys/env/mod.rs b/std/src/sys/env/mod.rs index f211a9fc86b3b..89856516b6dce 100644 --- a/std/src/sys/env/mod.rs +++ b/std/src/sys/env/mod.rs @@ -5,6 +5,7 @@ #[cfg(any( target_family = "unix", target_os = "hermit", + target_os = "motor", all(target_vendor = "fortanix", target_env = "sgx"), target_os = "solid_asp3", target_os = "uefi", @@ -26,6 +27,10 @@ cfg_select! { mod hermit; pub use hermit::*; } + target_os = "motor" => { + mod motor; + pub use motor::*; + } all(target_vendor = "fortanix", target_env = "sgx") => { mod sgx; pub use sgx::*; diff --git a/std/src/sys/env/motor.rs b/std/src/sys/env/motor.rs new file mode 100644 index 0000000000000..1f756ccd3ee85 --- /dev/null +++ b/std/src/sys/env/motor.rs @@ -0,0 +1,27 @@ +pub use super::common::Env; +use crate::ffi::{OsStr, OsString}; +use crate::io; +use crate::os::motor::ffi::OsStrExt; + +pub fn env() -> Env { + let motor_env: Vec<(String, String)> = moto_rt::process::env(); + let mut rust_env = vec![]; + + for (k, v) in motor_env { + rust_env.push((OsString::from(k), OsString::from(v))); + } + + Env::new(rust_env) +} + +pub fn getenv(key: &OsStr) -> Option { + moto_rt::process::getenv(key.as_str()).map(|s| OsString::from(s)) +} + +pub unsafe fn setenv(key: &OsStr, val: &OsStr) -> io::Result<()> { + Ok(moto_rt::process::setenv(key.as_str(), val.as_str())) +} + +pub unsafe fn unsetenv(key: &OsStr) -> io::Result<()> { + Ok(moto_rt::process::unsetenv(key.as_str())) +} diff --git a/std/src/sys/fd/mod.rs b/std/src/sys/fd/mod.rs index 7cb9dd1cba9d3..330499ecc18f6 100644 --- a/std/src/sys/fd/mod.rs +++ b/std/src/sys/fd/mod.rs @@ -11,6 +11,10 @@ cfg_select! { mod hermit; pub use hermit::*; } + target_os = "motor" => { + mod motor; + pub use motor::*; + } all(target_vendor = "fortanix", target_env = "sgx") => { mod sgx; pub use sgx::*; diff --git a/std/src/sys/fd/motor.rs b/std/src/sys/fd/motor.rs new file mode 100644 index 0000000000000..4211fef8007ab --- /dev/null +++ b/std/src/sys/fd/motor.rs @@ -0,0 +1,124 @@ +#![unstable(reason = "not public", issue = "none", feature = "fd")] + +use crate::io::{self, BorrowedCursor, IoSlice, IoSliceMut, Read}; +use crate::os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; +use crate::sys::map_motor_error; +use crate::sys_common::{AsInner, FromInner, IntoInner}; + +#[derive(Debug)] +pub struct FileDesc(OwnedFd); + +impl FileDesc { + pub fn read(&self, buf: &mut [u8]) -> io::Result { + moto_rt::fs::read(self.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn read_buf(&self, cursor: BorrowedCursor<'_>) -> io::Result<()> { + crate::io::default_read_buf(|buf| self.read(buf), cursor) + } + + pub fn read_vectored(&self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { + io::default_read_vectored(|b| self.read(b), bufs) + } + + pub fn read_to_end(&self, buf: &mut Vec) -> io::Result { + let mut me = self; + (&mut me).read_to_end(buf) + } + + pub fn write(&self, buf: &[u8]) -> io::Result { + moto_rt::fs::write(self.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn write_vectored(&self, bufs: &[IoSlice<'_>]) -> io::Result { + crate::io::default_write_vectored(|b| self.write(b), bufs) + } + + pub fn is_write_vectored(&self) -> bool { + false + } + + #[inline] + pub fn is_read_vectored(&self) -> bool { + false + } + + pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> { + moto_rt::net::set_nonblocking(self.as_raw_fd(), nonblocking).map_err(map_motor_error) + } + + #[inline] + pub fn duplicate(&self) -> io::Result { + let fd = moto_rt::fs::duplicate(self.as_raw_fd()).map_err(map_motor_error)?; + // SAFETY: safe because we just got it from the OS runtime. + unsafe { Ok(Self::from_raw_fd(fd)) } + } + + #[inline] + pub fn try_clone(&self) -> io::Result { + self.duplicate() + } +} + +impl<'a> Read for &'a FileDesc { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + (**self).read(buf) + } + + fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> { + (**self).read_buf(cursor) + } + + fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { + (**self).read_vectored(bufs) + } + + #[inline] + fn is_read_vectored(&self) -> bool { + (**self).is_read_vectored() + } +} + +impl AsInner for FileDesc { + #[inline] + fn as_inner(&self) -> &OwnedFd { + &self.0 + } +} + +impl IntoInner for FileDesc { + fn into_inner(self) -> OwnedFd { + self.0 + } +} + +impl FromInner for FileDesc { + fn from_inner(owned_fd: OwnedFd) -> Self { + Self(owned_fd) + } +} + +impl AsFd for FileDesc { + fn as_fd(&self) -> BorrowedFd<'_> { + self.0.as_fd() + } +} + +impl AsRawFd for FileDesc { + #[inline] + fn as_raw_fd(&self) -> RawFd { + self.0.as_raw_fd() + } +} + +impl IntoRawFd for FileDesc { + fn into_raw_fd(self) -> RawFd { + self.0.into_raw_fd() + } +} + +impl FromRawFd for FileDesc { + unsafe fn from_raw_fd(raw_fd: RawFd) -> Self { + unsafe { Self(FromRawFd::from_raw_fd(raw_fd)) } + } +} diff --git a/std/src/sys/fs/mod.rs b/std/src/sys/fs/mod.rs index 64f5a6b36d3db..403bfb2d9b929 100644 --- a/std/src/sys/fs/mod.rs +++ b/std/src/sys/fs/mod.rs @@ -27,6 +27,10 @@ cfg_select! { mod hermit; use hermit as imp; } + target_os = "motor" => { + mod motor; + use motor as imp; + } target_os = "solid_asp3" => { mod solid; use solid as imp; diff --git a/std/src/sys/fs/motor.rs b/std/src/sys/fs/motor.rs new file mode 100644 index 0000000000000..656b6e81b9518 --- /dev/null +++ b/std/src/sys/fs/motor.rs @@ -0,0 +1,478 @@ +use crate::ffi::OsString; +use crate::hash::Hash; +use crate::io::{self, BorrowedCursor, IoSlice, IoSliceMut, SeekFrom}; +use crate::os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, IntoRawFd, RawFd}; +use crate::path::{Path, PathBuf}; +use crate::sys::fd::FileDesc; +pub use crate::sys::fs::common::exists; +use crate::sys::time::SystemTime; +use crate::sys::{map_motor_error, unsupported}; +use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct FileType { + rt_filetype: u8, +} + +impl FileType { + pub fn is_dir(&self) -> bool { + self.rt_filetype == moto_rt::fs::FILETYPE_DIRECTORY + } + + pub fn is_file(&self) -> bool { + self.rt_filetype == moto_rt::fs::FILETYPE_FILE + } + + pub fn is_symlink(&self) -> bool { + false + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct FilePermissions { + rt_perm: u64, +} + +impl FilePermissions { + pub fn readonly(&self) -> bool { + (self.rt_perm & moto_rt::fs::PERM_WRITE == 0) + && (self.rt_perm & moto_rt::fs::PERM_READ != 0) + } + + pub fn set_readonly(&mut self, readonly: bool) { + if readonly { + self.rt_perm = moto_rt::fs::PERM_READ; + } else { + self.rt_perm = moto_rt::fs::PERM_READ | moto_rt::fs::PERM_WRITE; + } + } +} + +#[derive(Copy, Clone, Debug, Default)] +pub struct FileTimes { + modified: u128, + accessed: u128, +} + +impl FileTimes { + pub fn set_accessed(&mut self, t: SystemTime) { + self.accessed = t.as_u128(); + } + + pub fn set_modified(&mut self, t: SystemTime) { + self.modified = t.as_u128(); + } +} + +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct FileAttr { + inner: moto_rt::fs::FileAttr, +} + +impl FileAttr { + pub fn size(&self) -> u64 { + self.inner.size + } + + pub fn perm(&self) -> FilePermissions { + FilePermissions { rt_perm: self.inner.perm } + } + + pub fn file_type(&self) -> FileType { + FileType { rt_filetype: self.inner.file_type } + } + + pub fn modified(&self) -> io::Result { + match self.inner.modified { + 0 => Err(crate::io::Error::from(crate::io::ErrorKind::Other)), + x => Ok(SystemTime::from_u128(x)), + } + } + + pub fn accessed(&self) -> io::Result { + match self.inner.accessed { + 0 => Err(crate::io::Error::from(crate::io::ErrorKind::Other)), + x => Ok(SystemTime::from_u128(x)), + } + } + + pub fn created(&self) -> io::Result { + match self.inner.created { + 0 => Err(crate::io::Error::from(crate::io::ErrorKind::Other)), + x => Ok(SystemTime::from_u128(x)), + } + } +} + +#[derive(Clone, Debug)] +pub struct OpenOptions { + rt_open_options: u32, +} + +impl OpenOptions { + pub fn new() -> OpenOptions { + OpenOptions { rt_open_options: 0 } + } + + pub fn read(&mut self, read: bool) { + if read { + self.rt_open_options |= moto_rt::fs::O_READ; + } else { + self.rt_open_options &= !moto_rt::fs::O_READ; + } + } + + pub fn write(&mut self, write: bool) { + if write { + self.rt_open_options |= moto_rt::fs::O_WRITE; + } else { + self.rt_open_options &= !moto_rt::fs::O_WRITE; + } + } + + pub fn append(&mut self, append: bool) { + if append { + self.rt_open_options |= moto_rt::fs::O_APPEND; + } else { + self.rt_open_options &= !moto_rt::fs::O_APPEND; + } + } + + pub fn truncate(&mut self, truncate: bool) { + if truncate { + self.rt_open_options |= moto_rt::fs::O_TRUNCATE; + } else { + self.rt_open_options &= !moto_rt::fs::O_TRUNCATE; + } + } + + pub fn create(&mut self, create: bool) { + if create { + self.rt_open_options |= moto_rt::fs::O_CREATE; + } else { + self.rt_open_options &= !moto_rt::fs::O_CREATE; + } + } + + pub fn create_new(&mut self, create_new: bool) { + if create_new { + self.rt_open_options |= moto_rt::fs::O_CREATE_NEW; + } else { + self.rt_open_options &= !moto_rt::fs::O_CREATE_NEW; + } + } +} + +#[derive(Debug)] +pub struct File(FileDesc); + +impl File { + pub fn open(path: &Path, opts: &OpenOptions) -> io::Result { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + moto_rt::fs::open(path, opts.rt_open_options) + .map(|fd| unsafe { Self::from_raw_fd(fd) }) + .map_err(map_motor_error) + } + + pub fn file_attr(&self) -> io::Result { + moto_rt::fs::get_file_attr(self.as_raw_fd()) + .map(|inner| -> FileAttr { FileAttr { inner } }) + .map_err(map_motor_error) + } + + pub fn fsync(&self) -> io::Result<()> { + moto_rt::fs::fsync(self.as_raw_fd()).map_err(map_motor_error) + } + + pub fn datasync(&self) -> io::Result<()> { + moto_rt::fs::datasync(self.as_raw_fd()).map_err(map_motor_error) + } + + pub fn truncate(&self, size: u64) -> io::Result<()> { + moto_rt::fs::truncate(self.as_raw_fd(), size).map_err(map_motor_error) + } + + pub fn read(&self, buf: &mut [u8]) -> io::Result { + moto_rt::fs::read(self.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn read_vectored(&self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { + crate::io::default_read_vectored(|b| self.read(b), bufs) + } + + pub fn is_read_vectored(&self) -> bool { + false + } + + pub fn read_buf(&self, cursor: BorrowedCursor<'_>) -> io::Result<()> { + crate::io::default_read_buf(|buf| self.read(buf), cursor) + } + + pub fn write(&self, buf: &[u8]) -> io::Result { + moto_rt::fs::write(self.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn write_vectored(&self, bufs: &[IoSlice<'_>]) -> io::Result { + crate::io::default_write_vectored(|b| self.write(b), bufs) + } + + pub fn is_write_vectored(&self) -> bool { + false + } + + pub fn flush(&self) -> io::Result<()> { + moto_rt::fs::flush(self.as_raw_fd()).map_err(map_motor_error) + } + + pub fn seek(&self, pos: SeekFrom) -> io::Result { + match pos { + SeekFrom::Start(offset) => { + moto_rt::fs::seek(self.as_raw_fd(), offset as i64, moto_rt::fs::SEEK_SET) + .map_err(map_motor_error) + } + SeekFrom::End(offset) => { + moto_rt::fs::seek(self.as_raw_fd(), offset, moto_rt::fs::SEEK_END) + .map_err(map_motor_error) + } + SeekFrom::Current(offset) => { + moto_rt::fs::seek(self.as_raw_fd(), offset, moto_rt::fs::SEEK_CUR) + .map_err(map_motor_error) + } + } + } + + pub fn tell(&self) -> io::Result { + self.seek(SeekFrom::Current(0)) + } + + pub fn duplicate(&self) -> io::Result { + moto_rt::fs::duplicate(self.as_raw_fd()) + .map(|fd| unsafe { Self::from_raw_fd(fd) }) + .map_err(map_motor_error) + } + + pub fn set_permissions(&self, perm: FilePermissions) -> io::Result<()> { + moto_rt::fs::set_file_perm(self.as_raw_fd(), perm.rt_perm).map_err(map_motor_error) + } + + pub fn set_times(&self, _times: FileTimes) -> io::Result<()> { + unsupported() // Let's not do that. + } + + pub fn lock(&self) -> io::Result<()> { + unsupported() + } + + pub fn lock_shared(&self) -> io::Result<()> { + unsupported() + } + + pub fn try_lock(&self) -> Result<(), crate::fs::TryLockError> { + Err(crate::fs::TryLockError::Error(io::Error::from(io::ErrorKind::Unsupported))) + } + + pub fn try_lock_shared(&self) -> Result<(), crate::fs::TryLockError> { + Err(crate::fs::TryLockError::Error(io::Error::from(io::ErrorKind::Unsupported))) + } + + pub fn unlock(&self) -> io::Result<()> { + unsupported() + } + + pub fn size(&self) -> Option> { + None + } +} + +#[derive(Debug)] +pub struct DirBuilder {} + +impl DirBuilder { + pub fn new() -> DirBuilder { + DirBuilder {} + } + + pub fn mkdir(&self, path: &Path) -> io::Result<()> { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + moto_rt::fs::mkdir(path).map_err(map_motor_error) + } +} + +pub fn unlink(path: &Path) -> io::Result<()> { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + moto_rt::fs::unlink(path).map_err(map_motor_error) +} + +pub fn rename(old: &Path, new: &Path) -> io::Result<()> { + let old = old.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + let new = new.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + moto_rt::fs::rename(old, new).map_err(map_motor_error) +} + +pub fn rmdir(path: &Path) -> io::Result<()> { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + moto_rt::fs::rmdir(path).map_err(map_motor_error) +} + +pub fn remove_dir_all(path: &Path) -> io::Result<()> { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + moto_rt::fs::rmdir_all(path).map_err(map_motor_error) +} + +pub fn set_perm(path: &Path, perm: FilePermissions) -> io::Result<()> { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + moto_rt::fs::set_perm(path, perm.rt_perm).map_err(map_motor_error) +} + +pub fn readlink(_p: &Path) -> io::Result { + unsupported() +} + +pub fn symlink(_original: &Path, _link: &Path) -> io::Result<()> { + unsupported() +} + +pub fn link(_src: &Path, _dst: &Path) -> io::Result<()> { + unsupported() +} + +pub fn stat(path: &Path) -> io::Result { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + let inner = moto_rt::fs::stat(path).map_err(map_motor_error)?; + Ok(FileAttr { inner }) +} + +pub fn lstat(path: &Path) -> io::Result { + stat(path) +} + +pub fn canonicalize(path: &Path) -> io::Result { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + let path = moto_rt::fs::canonicalize(path).map_err(map_motor_error)?; + Ok(path.into()) +} + +pub fn copy(from: &Path, to: &Path) -> io::Result { + let from = from.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + let to = to.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + moto_rt::fs::copy(from, to).map_err(map_motor_error) +} + +#[derive(Debug)] +pub struct ReadDir { + rt_fd: moto_rt::RtFd, + path: String, +} + +impl Drop for ReadDir { + fn drop(&mut self) { + moto_rt::fs::closedir(self.rt_fd).unwrap(); + } +} + +pub fn readdir(path: &Path) -> io::Result { + let path = path.to_str().ok_or(io::Error::from(io::ErrorKind::InvalidFilename))?; + Ok(ReadDir { + rt_fd: moto_rt::fs::opendir(path).map_err(map_motor_error)?, + path: path.to_owned(), + }) +} + +impl Iterator for ReadDir { + type Item = io::Result; + + fn next(&mut self) -> Option> { + match moto_rt::fs::readdir(self.rt_fd).map_err(map_motor_error) { + Ok(maybe_item) => match maybe_item { + Some(inner) => Some(Ok(DirEntry { inner, parent_path: self.path.clone() })), + None => None, + }, + Err(err) => Some(Err(err)), + } + } +} + +pub struct DirEntry { + parent_path: String, + inner: moto_rt::fs::DirEntry, +} + +impl DirEntry { + fn filename(&self) -> &str { + core::str::from_utf8(unsafe { + core::slice::from_raw_parts(self.inner.fname.as_ptr(), self.inner.fname_size as usize) + }) + .unwrap() + } + + pub fn path(&self) -> PathBuf { + let mut path = self.parent_path.clone(); + path.push_str("/"); + path.push_str(self.filename()); + path.into() + } + + pub fn file_name(&self) -> OsString { + self.filename().to_owned().into() + } + + pub fn metadata(&self) -> io::Result { + Ok(FileAttr { inner: self.inner.attr }) + } + + pub fn file_type(&self) -> io::Result { + Ok(FileType { rt_filetype: self.inner.attr.file_type }) + } +} + +impl AsInner for File { + #[inline] + fn as_inner(&self) -> &FileDesc { + &self.0 + } +} + +impl AsInnerMut for File { + #[inline] + fn as_inner_mut(&mut self) -> &mut FileDesc { + &mut self.0 + } +} + +impl IntoInner for File { + fn into_inner(self) -> FileDesc { + self.0 + } +} + +impl FromInner for File { + fn from_inner(file_desc: FileDesc) -> Self { + Self(file_desc) + } +} + +impl AsFd for File { + #[inline] + fn as_fd(&self) -> BorrowedFd<'_> { + self.0.as_fd() + } +} + +impl AsRawFd for File { + #[inline] + fn as_raw_fd(&self) -> RawFd { + self.0.as_raw_fd() + } +} + +impl IntoRawFd for File { + fn into_raw_fd(self) -> RawFd { + self.0.into_raw_fd() + } +} + +impl FromRawFd for File { + unsafe fn from_raw_fd(raw_fd: RawFd) -> Self { + unsafe { Self(FromRawFd::from_raw_fd(raw_fd)) } + } +} diff --git a/std/src/sys/io/is_terminal/motor.rs b/std/src/sys/io/is_terminal/motor.rs new file mode 100644 index 0000000000000..0b70299adaaa2 --- /dev/null +++ b/std/src/sys/io/is_terminal/motor.rs @@ -0,0 +1,6 @@ +use crate::os::fd::{AsFd, AsRawFd}; + +pub fn is_terminal(fd: &impl AsFd) -> bool { + let fd = fd.as_fd(); + moto_rt::fs::is_terminal(fd.as_raw_fd()) +} diff --git a/std/src/sys/io/mod.rs b/std/src/sys/io/mod.rs index fe8ec1dbb7325..0916eda1c06a5 100644 --- a/std/src/sys/io/mod.rs +++ b/std/src/sys/io/mod.rs @@ -39,6 +39,10 @@ mod is_terminal { mod hermit; pub use hermit::*; } + target_os = "motor" => { + mod motor; + pub use motor::*; + } _ => { mod unsupported; pub use unsupported::*; diff --git a/std/src/sys/net/connection/mod.rs b/std/src/sys/net/connection/mod.rs index 41e7159f909ae..2f064914a8317 100644 --- a/std/src/sys/net/connection/mod.rs +++ b/std/src/sys/net/connection/mod.rs @@ -17,6 +17,10 @@ cfg_select! { mod wasip1; pub use wasip1::*; } + target_os = "motor" => { + mod motor; + pub use motor::*; + } target_os = "xous" => { mod xous; pub use xous::*; diff --git a/std/src/sys/net/connection/motor.rs b/std/src/sys/net/connection/motor.rs new file mode 100644 index 0000000000000..e9bf29e34f90c --- /dev/null +++ b/std/src/sys/net/connection/motor.rs @@ -0,0 +1,521 @@ +pub use moto_rt::netc; + +use crate::io::{self, BorrowedCursor, IoSlice, IoSliceMut}; +use crate::net::SocketAddr::{V4, V6}; +use crate::net::{Ipv4Addr, Ipv6Addr, Shutdown, SocketAddr, ToSocketAddrs}; +use crate::os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, IntoRawFd, RawFd}; +use crate::sys::fd::FileDesc; +use crate::sys::map_motor_error; +use crate::sys_common::{AsInner, FromInner, IntoInner}; +use crate::time::Duration; + +// We want to re-use as much of Rust's stdlib code as possible, +// and most of it is unixy, but with a lot of nesting. +#[derive(Debug)] +pub struct Socket(FileDesc); + +#[derive(Debug)] +pub struct TcpStream { + inner: Socket, +} + +impl TcpStream { + pub fn socket(&self) -> &Socket { + &self.inner + } + + pub fn into_socket(self) -> Socket { + self.inner + } + + pub fn connect(addr: A) -> io::Result { + let addr = into_netc(&addr.to_socket_addrs()?.next().unwrap()); + moto_rt::net::tcp_connect(&addr, Duration::MAX, false) + .map(|fd| Self { inner: unsafe { Socket::from_raw_fd(fd) } }) + .map_err(map_motor_error) + } + + pub fn connect_timeout(addr: &SocketAddr, timeout: Duration) -> io::Result { + let addr = into_netc(addr); + moto_rt::net::tcp_connect(&addr, timeout, false) + .map(|fd| Self { inner: unsafe { Socket::from_raw_fd(fd) } }) + .map_err(map_motor_error) + } + + pub fn set_read_timeout(&self, timeout: Option) -> io::Result<()> { + moto_rt::net::set_read_timeout(self.inner.as_raw_fd(), timeout).map_err(map_motor_error) + } + + pub fn set_write_timeout(&self, timeout: Option) -> io::Result<()> { + moto_rt::net::set_write_timeout(self.inner.as_raw_fd(), timeout).map_err(map_motor_error) + } + + pub fn read_timeout(&self) -> io::Result> { + moto_rt::net::read_timeout(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn write_timeout(&self) -> io::Result> { + moto_rt::net::write_timeout(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn peek(&self, buf: &mut [u8]) -> io::Result { + moto_rt::net::peek(self.inner.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn read(&self, buf: &mut [u8]) -> io::Result { + moto_rt::fs::read(self.inner.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn read_buf(&self, cursor: BorrowedCursor<'_>) -> io::Result<()> { + crate::io::default_read_buf(|buf| self.read(buf), cursor) + } + + pub fn read_vectored(&self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { + let bufs: &mut [&mut [u8]] = unsafe { core::mem::transmute(bufs) }; + moto_rt::fs::read_vectored(self.inner.as_raw_fd(), bufs).map_err(map_motor_error) + } + + pub fn is_read_vectored(&self) -> bool { + true + } + + pub fn write(&self, buf: &[u8]) -> io::Result { + moto_rt::fs::write(self.inner.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn write_vectored(&self, bufs: &[IoSlice<'_>]) -> io::Result { + let bufs: &[&[u8]] = unsafe { core::mem::transmute(bufs) }; + moto_rt::fs::write_vectored(self.inner.as_raw_fd(), bufs).map_err(map_motor_error) + } + + pub fn is_write_vectored(&self) -> bool { + true + } + + pub fn peer_addr(&self) -> io::Result { + moto_rt::net::peer_addr(self.inner.as_raw_fd()) + .map(|addr| from_netc(&addr)) + .map_err(map_motor_error) + } + + pub fn socket_addr(&self) -> io::Result { + moto_rt::net::socket_addr(self.inner.as_raw_fd()) + .map(|addr| from_netc(&addr)) + .map_err(map_motor_error) + } + + pub fn shutdown(&self, shutdown: Shutdown) -> io::Result<()> { + let shutdown = match shutdown { + Shutdown::Read => moto_rt::net::SHUTDOWN_READ, + Shutdown::Write => moto_rt::net::SHUTDOWN_WRITE, + Shutdown::Both => moto_rt::net::SHUTDOWN_READ | moto_rt::net::SHUTDOWN_WRITE, + }; + + moto_rt::net::shutdown(self.inner.as_raw_fd(), shutdown).map_err(map_motor_error) + } + + pub fn duplicate(&self) -> io::Result { + moto_rt::fs::duplicate(self.inner.as_raw_fd()) + .map(|fd| Self { inner: unsafe { Socket::from_raw_fd(fd) } }) + .map_err(map_motor_error) + } + + pub fn set_linger(&self, timeout: Option) -> io::Result<()> { + moto_rt::net::set_linger(self.inner.as_raw_fd(), timeout).map_err(map_motor_error) + } + + pub fn linger(&self) -> io::Result> { + moto_rt::net::linger(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn set_nodelay(&self, nodelay: bool) -> io::Result<()> { + moto_rt::net::set_nodelay(self.inner.as_raw_fd(), nodelay).map_err(map_motor_error) + } + + pub fn nodelay(&self) -> io::Result { + moto_rt::net::nodelay(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn set_ttl(&self, ttl: u32) -> io::Result<()> { + moto_rt::net::set_ttl(self.inner.as_raw_fd(), ttl).map_err(map_motor_error) + } + + pub fn ttl(&self) -> io::Result { + moto_rt::net::ttl(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn take_error(&self) -> io::Result> { + let e = moto_rt::net::take_error(self.inner.as_raw_fd()).map_err(map_motor_error)?; + if e == moto_rt::E_OK { Ok(None) } else { Ok(Some(map_motor_error(e))) } + } + + pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> { + moto_rt::net::set_nonblocking(self.inner.as_raw_fd(), nonblocking).map_err(map_motor_error) + } +} + +#[derive(Debug)] +pub struct TcpListener { + inner: Socket, +} + +impl TcpListener { + #[inline] + pub fn socket(&self) -> &Socket { + &self.inner + } + + pub fn into_socket(self) -> Socket { + self.inner + } + + pub fn bind(addr: A) -> io::Result { + let addr = into_netc(&addr.to_socket_addrs()?.next().unwrap()); + moto_rt::net::bind(moto_rt::net::PROTO_TCP, &addr) + .map(|fd| Self { inner: unsafe { Socket::from_raw_fd(fd) } }) + .map_err(map_motor_error) + } + + pub fn socket_addr(&self) -> io::Result { + moto_rt::net::socket_addr(self.inner.as_raw_fd()) + .map(|addr| from_netc(&addr)) + .map_err(map_motor_error) + } + + pub fn accept(&self) -> io::Result<(TcpStream, SocketAddr)> { + moto_rt::net::accept(self.inner.as_raw_fd()) + .map(|(fd, addr)| { + (TcpStream { inner: unsafe { Socket::from_raw_fd(fd) } }, from_netc(&addr)) + }) + .map_err(map_motor_error) + } + + pub fn duplicate(&self) -> io::Result { + moto_rt::fs::duplicate(self.inner.as_raw_fd()) + .map(|fd| Self { inner: unsafe { Socket::from_raw_fd(fd) } }) + .map_err(map_motor_error) + } + + pub fn set_ttl(&self, ttl: u32) -> io::Result<()> { + moto_rt::net::set_ttl(self.inner.as_raw_fd(), ttl).map_err(map_motor_error) + } + + pub fn ttl(&self) -> io::Result { + moto_rt::net::ttl(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn set_only_v6(&self, only_v6: bool) -> io::Result<()> { + moto_rt::net::set_only_v6(self.inner.as_raw_fd(), only_v6).map_err(map_motor_error) + } + + pub fn only_v6(&self) -> io::Result { + moto_rt::net::only_v6(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn take_error(&self) -> io::Result> { + let e = moto_rt::net::take_error(self.inner.as_raw_fd()).map_err(map_motor_error)?; + if e == moto_rt::E_OK { Ok(None) } else { Ok(Some(map_motor_error(e))) } + } + + pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> { + moto_rt::net::set_nonblocking(self.inner.as_raw_fd(), nonblocking).map_err(map_motor_error) + } +} + +#[derive(Debug)] +pub struct UdpSocket { + inner: Socket, +} + +impl UdpSocket { + pub fn socket(&self) -> &Socket { + &self.inner + } + + pub fn into_socket(self) -> Socket { + self.inner + } + + pub fn bind(addr: A) -> io::Result { + let addr = into_netc(&addr.to_socket_addrs()?.next().unwrap()); + moto_rt::net::bind(moto_rt::net::PROTO_UDP, &addr) + .map(|fd| Self { inner: unsafe { Socket::from_raw_fd(fd) } }) + .map_err(map_motor_error) + } + + pub fn peer_addr(&self) -> io::Result { + moto_rt::net::peer_addr(self.inner.as_raw_fd()) + .map(|addr| from_netc(&addr)) + .map_err(map_motor_error) + } + + pub fn socket_addr(&self) -> io::Result { + moto_rt::net::socket_addr(self.inner.as_raw_fd()) + .map(|addr| from_netc(&addr)) + .map_err(map_motor_error) + } + + pub fn recv_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> { + moto_rt::net::udp_recv_from(self.inner.as_raw_fd(), buf) + .map(|(sz, addr)| (sz, from_netc(&addr))) + .map_err(map_motor_error) + } + + pub fn peek_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> { + moto_rt::net::udp_peek_from(self.inner.as_raw_fd(), buf) + .map(|(sz, addr)| (sz, from_netc(&addr))) + .map_err(map_motor_error) + } + + pub fn send_to(&self, buf: &[u8], addr: &SocketAddr) -> io::Result { + let addr = into_netc(addr); + moto_rt::net::udp_send_to(self.inner.as_raw_fd(), buf, &addr).map_err(map_motor_error) + } + + pub fn duplicate(&self) -> io::Result { + moto_rt::fs::duplicate(self.inner.as_raw_fd()) + .map(|fd| Self { inner: unsafe { Socket::from_raw_fd(fd) } }) + .map_err(map_motor_error) + } + + pub fn set_read_timeout(&self, timeout: Option) -> io::Result<()> { + moto_rt::net::set_read_timeout(self.inner.as_raw_fd(), timeout).map_err(map_motor_error) + } + + pub fn set_write_timeout(&self, timeout: Option) -> io::Result<()> { + moto_rt::net::set_write_timeout(self.inner.as_raw_fd(), timeout).map_err(map_motor_error) + } + + pub fn read_timeout(&self) -> io::Result> { + moto_rt::net::read_timeout(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn write_timeout(&self) -> io::Result> { + moto_rt::net::write_timeout(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn set_broadcast(&self, broadcast: bool) -> io::Result<()> { + moto_rt::net::set_udp_broadcast(self.inner.as_raw_fd(), broadcast).map_err(map_motor_error) + } + + pub fn broadcast(&self) -> io::Result { + moto_rt::net::udp_broadcast(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn set_multicast_loop_v4(&self, val: bool) -> io::Result<()> { + moto_rt::net::set_udp_multicast_loop_v4(self.inner.as_raw_fd(), val) + .map_err(map_motor_error) + } + + pub fn multicast_loop_v4(&self) -> io::Result { + moto_rt::net::udp_multicast_loop_v4(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn set_multicast_ttl_v4(&self, val: u32) -> io::Result<()> { + moto_rt::net::set_udp_multicast_ttl_v4(self.inner.as_raw_fd(), val).map_err(map_motor_error) + } + + pub fn multicast_ttl_v4(&self) -> io::Result { + moto_rt::net::udp_multicast_ttl_v4(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn set_multicast_loop_v6(&self, val: bool) -> io::Result<()> { + moto_rt::net::set_udp_multicast_loop_v6(self.inner.as_raw_fd(), val) + .map_err(map_motor_error) + } + + pub fn multicast_loop_v6(&self) -> io::Result { + moto_rt::net::udp_multicast_loop_v6(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn join_multicast_v4(&self, addr: &Ipv4Addr, iface: &Ipv4Addr) -> io::Result<()> { + let addr = (*addr).into(); + let iface = (*iface).into(); + moto_rt::net::join_udp_multicast_v4(self.inner.as_raw_fd(), &addr, &iface) + .map_err(map_motor_error) + } + + pub fn join_multicast_v6(&self, addr: &Ipv6Addr, iface: u32) -> io::Result<()> { + let addr = (*addr).into(); + moto_rt::net::join_udp_multicast_v6(self.inner.as_raw_fd(), &addr, iface) + .map_err(map_motor_error) + } + + pub fn leave_multicast_v4(&self, addr: &Ipv4Addr, iface: &Ipv4Addr) -> io::Result<()> { + let addr = (*addr).into(); + let iface = (*iface).into(); + moto_rt::net::leave_udp_multicast_v4(self.inner.as_raw_fd(), &addr, &iface) + .map_err(map_motor_error) + } + + pub fn leave_multicast_v6(&self, addr: &Ipv6Addr, iface: u32) -> io::Result<()> { + let addr = (*addr).into(); + moto_rt::net::leave_udp_multicast_v6(self.inner.as_raw_fd(), &addr, iface) + .map_err(map_motor_error) + } + + pub fn set_ttl(&self, ttl: u32) -> io::Result<()> { + moto_rt::net::set_ttl(self.inner.as_raw_fd(), ttl).map_err(map_motor_error) + } + + pub fn ttl(&self) -> io::Result { + moto_rt::net::ttl(self.inner.as_raw_fd()).map_err(map_motor_error) + } + + pub fn take_error(&self) -> io::Result> { + moto_rt::net::take_error(self.inner.as_raw_fd()) + .map(|e| match e { + moto_rt::E_OK => None, + e => Some(map_motor_error(e)), + }) + .map_err(map_motor_error) + } + + pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> { + moto_rt::net::set_nonblocking(self.inner.as_raw_fd(), nonblocking).map_err(map_motor_error) + } + + pub fn recv(&self, buf: &mut [u8]) -> io::Result { + moto_rt::fs::read(self.inner.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn peek(&self, buf: &mut [u8]) -> io::Result { + moto_rt::net::peek(self.inner.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn send(&self, buf: &[u8]) -> io::Result { + moto_rt::fs::write(self.inner.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn connect(&self, addr: A) -> io::Result<()> { + let addr = into_netc(&addr.to_socket_addrs()?.next().unwrap()); + moto_rt::net::udp_connect(self.inner.as_raw_fd(), &addr).map_err(map_motor_error) + } +} + +pub struct LookupHost { + addresses: alloc::collections::VecDeque, +} + +pub fn lookup_host(host: &str, port: u16) -> io::Result { + let (_port, addresses) = moto_rt::net::lookup_host(host, port).map_err(map_motor_error)?; + Ok(LookupHost { addresses }) +} + +impl Iterator for LookupHost { + type Item = SocketAddr; + fn next(&mut self) -> Option { + self.addresses.pop_front().map(|addr| from_netc(&addr)) + } +} + +impl TryFrom<&str> for LookupHost { + type Error = io::Error; + + fn try_from(host_port: &str) -> io::Result { + let (host, port_str) = host_port + .rsplit_once(':') + .ok_or(moto_rt::E_INVALID_ARGUMENT) + .map_err(map_motor_error)?; + let port: u16 = + port_str.parse().map_err(|_| moto_rt::E_INVALID_ARGUMENT).map_err(map_motor_error)?; + (host, port).try_into() + } +} + +impl<'a> TryFrom<(&'a str, u16)> for LookupHost { + type Error = io::Error; + + fn try_from(host_port: (&'a str, u16)) -> io::Result { + let (host, port) = host_port; + + let (_port, addresses) = moto_rt::net::lookup_host(host, port).map_err(map_motor_error)?; + Ok(LookupHost { addresses }) + } +} + +fn into_netc(addr: &SocketAddr) -> netc::sockaddr { + match addr { + V4(addr4) => netc::sockaddr { v4: (*addr4).into() }, + V6(addr6) => netc::sockaddr { v6: (*addr6).into() }, + } +} + +fn from_netc(addr: &netc::sockaddr) -> SocketAddr { + // SAFETY: all variants of union netc::sockaddr have `sin_family` at the same offset. + let family = unsafe { addr.v4.sin_family }; + match family { + netc::AF_INET => SocketAddr::V4(crate::net::SocketAddrV4::from(unsafe { addr.v4 })), + netc::AF_INET6 => SocketAddr::V6(crate::net::SocketAddrV6::from(unsafe { addr.v6 })), + _ => panic!("bad sin_family {family}"), + } +} + +impl AsInner for Socket { + #[inline] + fn as_inner(&self) -> &FileDesc { + &self.0 + } +} + +impl IntoInner for Socket { + fn into_inner(self) -> FileDesc { + self.0 + } +} + +impl FromInner for Socket { + fn from_inner(file_desc: FileDesc) -> Self { + Self(file_desc) + } +} + +impl AsFd for Socket { + fn as_fd(&self) -> BorrowedFd<'_> { + self.0.as_fd() + } +} + +impl AsRawFd for Socket { + #[inline] + fn as_raw_fd(&self) -> RawFd { + self.0.as_raw_fd() + } +} + +impl IntoRawFd for Socket { + fn into_raw_fd(self) -> RawFd { + self.0.into_raw_fd() + } +} + +impl FromRawFd for Socket { + unsafe fn from_raw_fd(raw_fd: RawFd) -> Self { + Self(FromRawFd::from_raw_fd(raw_fd)) + } +} + +impl AsInner for TcpStream { + #[inline] + fn as_inner(&self) -> &Socket { + &self.inner + } +} + +impl FromInner for TcpStream { + fn from_inner(socket: Socket) -> TcpStream { + TcpStream { inner: socket } + } +} + +impl FromInner for TcpListener { + fn from_inner(socket: Socket) -> TcpListener { + TcpListener { inner: socket } + } +} + +impl FromInner for UdpSocket { + fn from_inner(socket: Socket) -> UdpSocket { + UdpSocket { inner: socket } + } +} diff --git a/std/src/sys/pal/mod.rs b/std/src/sys/pal/mod.rs index 9e964540a87c1..e11df38a8ee68 100644 --- a/std/src/sys/pal/mod.rs +++ b/std/src/sys/pal/mod.rs @@ -41,6 +41,10 @@ cfg_select! { mod hermit; pub use self::hermit::*; } + target_os = "motor" => { + mod motor; + pub use self::motor::*; + } target_os = "trusty" => { mod trusty; pub use self::trusty::*; diff --git a/std/src/sys/pal/motor/mod.rs b/std/src/sys/pal/motor/mod.rs new file mode 100644 index 0000000000000..c64f8ff7a8a83 --- /dev/null +++ b/std/src/sys/pal/motor/mod.rs @@ -0,0 +1,77 @@ +#![allow(unsafe_op_in_unsafe_fn)] + +pub mod os; +pub mod pipe; +pub mod time; + +pub use moto_rt::futex; + +use crate::io as std_io; +use crate::sys::RawOsError; + +pub(crate) fn map_motor_error(err: moto_rt::ErrorCode) -> crate::io::Error { + crate::io::Error::from_raw_os_error(err.into()) +} + +#[cfg(not(test))] +#[unsafe(no_mangle)] +pub extern "C" fn motor_start() -> ! { + // Initialize the runtime. + moto_rt::start(); + + // Call main. + unsafe extern "C" { + fn main(_: isize, _: *const *const u8, _: u8) -> i32; + } + let result = unsafe { main(0, core::ptr::null(), 0) }; + + // Terminate the process. + moto_rt::process::exit(result) +} + +// SAFETY: must be called only once during runtime initialization. +// NOTE: Motor OS uses moto_rt::start() to initialize runtime (see above). +pub unsafe fn init(_argc: isize, _argv: *const *const u8, _sigpipe: u8) {} + +// SAFETY: must be called only once during runtime cleanup. +// NOTE: this is not guaranteed to run, for example when the program aborts. +pub unsafe fn cleanup() {} + +pub fn unsupported() -> std_io::Result { + Err(unsupported_err()) +} + +pub fn unsupported_err() -> std_io::Error { + std_io::Error::UNSUPPORTED_PLATFORM +} + +pub fn is_interrupted(_code: RawOsError) -> bool { + false // Motor OS doesn't have signals. +} + +pub fn decode_error_kind(code: RawOsError) -> crate::io::ErrorKind { + use moto_rt::error::*; + use std_io::ErrorKind; + + if code < 0 || code > u16::MAX.into() { + return std_io::ErrorKind::Uncategorized; + } + + match code as moto_rt::ErrorCode /* u16 */ { + E_ALREADY_IN_USE => ErrorKind::AlreadyExists, + E_INVALID_FILENAME => ErrorKind::InvalidFilename, + E_NOT_FOUND => ErrorKind::NotFound, + E_TIMED_OUT => ErrorKind::TimedOut, + E_NOT_IMPLEMENTED => ErrorKind::Unsupported, + E_FILE_TOO_LARGE => ErrorKind::FileTooLarge, + E_UNEXPECTED_EOF => ErrorKind::UnexpectedEof, + E_INVALID_ARGUMENT => ErrorKind::InvalidInput, + E_NOT_READY => ErrorKind::WouldBlock, + E_NOT_CONNECTED => ErrorKind::NotConnected, + _ => crate::io::ErrorKind::Uncategorized, + } +} + +pub fn abort_internal() -> ! { + core::intrinsics::abort(); +} diff --git a/std/src/sys/pal/motor/os.rs b/std/src/sys/pal/motor/os.rs new file mode 100644 index 0000000000000..052e3b238b6af --- /dev/null +++ b/std/src/sys/pal/motor/os.rs @@ -0,0 +1,100 @@ +use super::map_motor_error; +use crate::error::Error as StdError; +use crate::ffi::{OsStr, OsString}; +use crate::marker::PhantomData; +use crate::os::motor::ffi::OsStrExt; +use crate::path::{self, PathBuf}; +use crate::sys::RawOsError; +use crate::{fmt, io}; + +pub fn errno() -> RawOsError { + // Not used in Motor OS because it is ambiguous: Motor OS + // is micro-kernel-based, and I/O happens via a shared-memory + // ring buffer, so an I/O operation that on a unix is a syscall + // may involve no sycalls on Motor OS at all, or a syscall + // that e.g. waits for a notification from the I/O driver + // (sys-io); and the wait syscall may succeed, but the + // driver may report an I/O error; or a bunch of results + // for several I/O operations, some successful and some + // not. + // + // Also I/O operations in a Motor OS process are handled by a + // separate runtime background/I/O thread, so it is really hard + // to define what "last system error in the current thread" + // actually means. + moto_rt::E_UNKNOWN.into() +} + +pub fn error_string(errno: RawOsError) -> String { + let error_code: moto_rt::ErrorCode = match errno { + x if x < 0 => moto_rt::E_UNKNOWN, + x if x > u16::MAX.into() => moto_rt::E_UNKNOWN, + x => x as moto_rt::ErrorCode, /* u16 */ + }; + format!("{}", moto_rt::Error::from(error_code)) +} + +pub fn getcwd() -> io::Result { + moto_rt::fs::getcwd().map(PathBuf::from).map_err(map_motor_error) +} + +pub fn chdir(path: &path::Path) -> io::Result<()> { + moto_rt::fs::chdir(path.as_os_str().as_str()).map_err(map_motor_error) +} + +pub struct SplitPaths<'a>(!, PhantomData<&'a ()>); + +pub fn split_paths(_unparsed: &OsStr) -> SplitPaths<'_> { + panic!("unsupported") +} + +impl<'a> Iterator for SplitPaths<'a> { + type Item = PathBuf; + fn next(&mut self) -> Option { + self.0 + } +} + +#[derive(Debug)] +pub struct JoinPathsError; + +pub fn join_paths(_paths: I) -> Result +where + I: Iterator, + T: AsRef, +{ + Err(JoinPathsError) +} + +impl fmt::Display for JoinPathsError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + "not supported on this platform yet".fmt(f) + } +} + +impl StdError for JoinPathsError { + #[allow(deprecated)] + fn description(&self) -> &str { + "not supported on this platform yet" + } +} + +pub fn current_exe() -> io::Result { + moto_rt::process::current_exe().map(PathBuf::from).map_err(map_motor_error) +} + +pub fn temp_dir() -> PathBuf { + PathBuf::from(moto_rt::fs::TEMP_DIR) +} + +pub fn home_dir() -> Option { + None +} + +pub fn exit(code: i32) -> ! { + moto_rt::process::exit(code) +} + +pub fn getpid() -> u32 { + panic!("Pids on Motor OS are u64.") +} diff --git a/std/src/sys/pal/motor/pipe.rs b/std/src/sys/pal/motor/pipe.rs new file mode 100644 index 0000000000000..d3be6ddf1573e --- /dev/null +++ b/std/src/sys/pal/motor/pipe.rs @@ -0,0 +1,121 @@ +use crate::io::{self, BorrowedCursor, IoSlice, IoSliceMut}; +use crate::os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; +use crate::sys::fd::FileDesc; +use crate::sys::map_motor_error; +use crate::sys_common::{FromInner, IntoInner}; + +#[derive(Debug)] +pub struct AnonPipe(FileDesc); + +impl From for AnonPipe { + fn from(rt_fd: moto_rt::RtFd) -> AnonPipe { + unsafe { AnonPipe::from_raw_fd(rt_fd) } + } +} + +impl AnonPipe { + pub fn read(&self, buf: &mut [u8]) -> io::Result { + moto_rt::fs::read(self.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn read_buf(&self, cursor: BorrowedCursor<'_>) -> io::Result<()> { + crate::io::default_read_buf(|buf| self.read(buf), cursor) + } + + pub fn read_vectored(&self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { + crate::io::default_read_vectored(|b| self.read(b), bufs) + } + + pub fn is_read_vectored(&self) -> bool { + false + } + + pub fn write(&self, buf: &[u8]) -> io::Result { + moto_rt::fs::write(self.as_raw_fd(), buf).map_err(map_motor_error) + } + + pub fn write_vectored(&self, bufs: &[IoSlice<'_>]) -> io::Result { + crate::io::default_write_vectored(|b| self.write(b), bufs) + } + + pub fn is_write_vectored(&self) -> bool { + false + } + + pub fn read_to_end(&self, buf: &mut Vec) -> io::Result { + let mut temp_vec = Vec::new(); + let mut size = 0_usize; + loop { + temp_vec.resize(256, 0_u8); + match self.read(&mut temp_vec[..]) { + Ok(sz) => { + if sz == 0 { + return Ok(size); + } + size += sz; + temp_vec.truncate(sz); + buf.append(&mut temp_vec); + } + Err(err) => { + if size != 0 { + return Ok(size); + } else { + return Err(err); + } + } + } + } + } +} + +impl AsRawFd for AnonPipe { + fn as_raw_fd(&self) -> RawFd { + self.0.as_raw_fd() + } +} + +impl FromRawFd for AnonPipe { + unsafe fn from_raw_fd(fd: RawFd) -> Self { + let desc = FileDesc::from_raw_fd(fd); + Self(desc) + } +} + +impl IntoRawFd for AnonPipe { + fn into_raw_fd(self) -> RawFd { + self.0.into_raw_fd() + } +} + +impl AsFd for AnonPipe { + fn as_fd(&self) -> BorrowedFd<'_> { + self.0.as_fd() + } +} + +impl IntoInner for AnonPipe { + fn into_inner(self) -> OwnedFd { + self.0.into_inner() + } +} + +impl IntoInner for AnonPipe { + fn into_inner(self) -> FileDesc { + self.0 + } +} + +impl FromInner for AnonPipe { + fn from_inner(owned_fd: OwnedFd) -> Self { + Self(FileDesc::from_inner(owned_fd)) + } +} + +pub fn read2(_p1: AnonPipe, _v1: &mut Vec, _p2: AnonPipe, _v2: &mut Vec) -> io::Result<()> { + Err(io::Error::from_raw_os_error(moto_rt::E_NOT_IMPLEMENTED.into())) +} + +#[inline] +pub fn anon_pipe() -> io::Result<(AnonPipe, AnonPipe)> { + Err(io::Error::UNSUPPORTED_PLATFORM) +} diff --git a/std/src/sys/pal/motor/time.rs b/std/src/sys/pal/motor/time.rs new file mode 100644 index 0000000000000..e917fd466c2e4 --- /dev/null +++ b/std/src/sys/pal/motor/time.rs @@ -0,0 +1 @@ +pub use moto_rt::time::{Instant, SystemTime, UNIX_EPOCH}; diff --git a/std/src/sys/path/unix.rs b/std/src/sys/path/unix.rs index faa2616a6320d..15530323a198d 100644 --- a/std/src/sys/path/unix.rs +++ b/std/src/sys/path/unix.rs @@ -62,7 +62,7 @@ pub(crate) fn absolute(path: &Path) -> io::Result { } pub(crate) fn is_absolute(path: &Path) -> bool { - if cfg!(any(unix, target_os = "hermit", target_os = "wasi")) { + if cfg!(any(unix, target_os = "hermit", target_os = "wasi", target_os = "motor")) { path.has_root() } else { path.has_root() && path.prefix().is_some() diff --git a/std/src/sys/personality/mod.rs b/std/src/sys/personality/mod.rs index 158e44e1764a3..eabef92244d01 100644 --- a/std/src/sys/personality/mod.rs +++ b/std/src/sys/personality/mod.rs @@ -17,7 +17,7 @@ cfg_select! { target_os = "emscripten" => { mod emcc; } - any(target_env = "msvc", target_family = "wasm") => { + any(target_env = "msvc", target_family = "wasm", target_os = "motor") => { // This is required by the compiler to exist (e.g., it's a lang item), // but it's never actually called by the compiler because // __CxxFrameHandler3 (msvc) / __gxx_wasm_personality_v0 (wasm) is the diff --git a/std/src/sys/process/mod.rs b/std/src/sys/process/mod.rs index a1ed0cd2cdd2d..92e459298fc4c 100644 --- a/std/src/sys/process/mod.rs +++ b/std/src/sys/process/mod.rs @@ -11,6 +11,10 @@ cfg_select! { mod uefi; use uefi as imp; } + target_os = "motor" => { + mod motor; + use motor as imp; + } _ => { mod unsupported; use unsupported as imp; @@ -38,6 +42,7 @@ pub use imp::{ )) ), target_os = "windows", + target_os = "motor" ))] pub fn output(cmd: &mut Command) -> crate::io::Result<(ExitStatus, Vec, Vec)> { use crate::sys::pipe::read2; @@ -77,5 +82,6 @@ pub fn output(cmd: &mut Command) -> crate::io::Result<(ExitStatus, Vec, Vec< )) ), target_os = "windows", + target_os = "motor" )))] pub use imp::output; diff --git a/std/src/sys/process/motor.rs b/std/src/sys/process/motor.rs new file mode 100644 index 0000000000000..9060902bc3d20 --- /dev/null +++ b/std/src/sys/process/motor.rs @@ -0,0 +1,313 @@ +use super::CommandEnvs; +use super::env::CommandEnv; +use crate::ffi::OsStr; +pub use crate::ffi::OsString as EnvKey; +use crate::num::NonZeroI32; +use crate::os::fd::{FromRawFd, IntoRawFd}; +use crate::os::motor::ffi::OsStrExt; +use crate::path::Path; +use crate::process::StdioPipes; +use crate::sys::fs::File; +use crate::sys::map_motor_error; +use crate::sys::pipe::AnonPipe; +use crate::sys_common::{AsInner, FromInner}; +use crate::{fmt, io}; + +pub enum Stdio { + Inherit, + Null, + MakePipe, + Fd(crate::sys::fd::FileDesc), +} + +impl Stdio { + fn into_rt(self) -> moto_rt::RtFd { + match self { + Stdio::Inherit => moto_rt::process::STDIO_INHERIT, + Stdio::Null => moto_rt::process::STDIO_NULL, + Stdio::MakePipe => moto_rt::process::STDIO_MAKE_PIPE, + Stdio::Fd(fd) => fd.into_raw_fd(), + } + } + + fn try_clone(&self) -> io::Result { + match self { + Self::Fd(fd) => { + Ok(Self::Fd(crate::sys::fd::FileDesc::from_inner(fd.as_inner().try_clone()?))) + } + Self::Inherit => Ok(Self::Inherit), + Self::Null => Ok(Self::Null), + Self::MakePipe => Ok(Self::MakePipe), + } + } +} + +#[derive(Default)] +pub struct Command { + program: String, + args: Vec, + cwd: Option, + stdin: Option, + stdout: Option, + stderr: Option, + env: CommandEnv, +} + +impl Command { + pub fn new(program: &OsStr) -> Command { + let mut env = CommandEnv::default(); + env.remove(OsStr::new(moto_rt::process::STDIO_IS_TERMINAL_ENV_KEY)); + + Command { program: program.as_str().to_owned(), env, ..Default::default() } + } + + pub fn arg(&mut self, arg: &OsStr) { + self.args.push(arg.as_str().to_owned()) + } + + pub fn env_mut(&mut self) -> &mut CommandEnv { + &mut self.env + } + + pub fn cwd(&mut self, dir: &OsStr) { + self.cwd = Some(dir.as_str().to_owned()) + } + + pub fn stdin(&mut self, stdin: Stdio) { + self.stdin = Some(stdin); + } + + pub fn stdout(&mut self, stdout: Stdio) { + self.stdout = Some(stdout); + } + + pub fn stderr(&mut self, stderr: Stdio) { + self.stderr = Some(stderr); + } + + pub fn get_program(&self) -> &OsStr { + OsStr::new(self.program.as_str()) + } + + pub fn get_args(&self) -> CommandArgs<'_> { + let iter = self.args.iter(); + CommandArgs { iter } + } + + pub fn get_envs(&self) -> CommandEnvs<'_> { + self.env.iter() + } + + pub fn get_current_dir(&self) -> Option<&Path> { + self.cwd.as_ref().map(Path::new) + } + + pub fn spawn( + &mut self, + default: Stdio, + needs_stdin: bool, + ) -> io::Result<(Process, StdioPipes)> { + let stdin = if let Some(stdin) = self.stdin.as_ref() { + stdin.try_clone()?.into_rt() + } else if needs_stdin { + default.try_clone()?.into_rt() + } else { + Stdio::Null.into_rt() + }; + let stdout = if let Some(stdout) = self.stdout.as_ref() { + stdout.try_clone()?.into_rt() + } else { + default.try_clone()?.into_rt() + }; + let stderr = if let Some(stderr) = self.stdout.as_ref() { + stderr.try_clone()?.into_rt() + } else { + default.try_clone()?.into_rt() + }; + + let mut env = Vec::<(String, String)>::new(); + for (k, v) in self.env.capture() { + env.push((k.as_str().to_owned(), v.as_str().to_owned())); + } + + let args = moto_rt::process::SpawnArgs { + program: self.program.clone(), + args: self.args.clone(), + env, + cwd: self.cwd.clone(), + stdin, + stdout, + stderr, + }; + + let (handle, stdin, stdout, stderr) = + moto_rt::process::spawn(args).map_err(map_motor_error)?; + + Ok(( + Process { handle }, + StdioPipes { + stdin: if stdin >= 0 { Some(stdin.into()) } else { None }, + stdout: if stdout >= 0 { Some(stdout.into()) } else { None }, + stderr: if stderr >= 0 { Some(stderr.into()) } else { None }, + }, + )) + } +} + +impl From for Stdio { + fn from(pipe: AnonPipe) -> Stdio { + unsafe { Stdio::Fd(crate::sys::fd::FileDesc::from_raw_fd(pipe.into_raw_fd())) } + } +} + +impl From for Stdio { + fn from(fd: crate::sys::fd::FileDesc) -> Stdio { + Stdio::Fd(fd) + } +} + +impl From for Stdio { + fn from(_file: File) -> Stdio { + panic!("Not implemented") + } +} + +impl From for Stdio { + fn from(_: io::Stdout) -> Stdio { + panic!("Not implemented") + } +} + +impl From for Stdio { + fn from(_: io::Stderr) -> Stdio { + panic!("Not implemented") + } +} + +impl fmt::Debug for Command { + fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + Ok(()) + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)] +pub struct ExitStatus(i32); + +impl ExitStatus { + pub fn exit_ok(&self) -> Result<(), ExitStatusError> { + if self.0 == 0 { Ok(()) } else { Err(ExitStatusError(*self)) } + } + + pub fn code(&self) -> Option { + Some(self.0) + } +} + +impl fmt::Display for ExitStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "exit code: {}", self.0) + } +} +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +pub struct ExitStatusError(ExitStatus); + +impl Into for ExitStatusError { + fn into(self) -> ExitStatus { + self.0 + } +} + +impl ExitStatusError { + pub fn code(self) -> Option { + NonZeroI32::new(self.0.0) + } +} + +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +pub struct ExitCode(i32); + +impl ExitCode { + pub const SUCCESS: ExitCode = ExitCode(0); + pub const FAILURE: ExitCode = ExitCode(1); + + pub fn as_i32(&self) -> i32 { + self.0 + } +} + +impl From for ExitCode { + fn from(code: u8) -> Self { + Self(code as i32) + } +} + +pub struct Process { + handle: u64, +} + +impl Drop for Process { + fn drop(&mut self) { + moto_rt::alloc::release_handle(self.handle).unwrap(); + } +} + +impl Process { + pub fn id(&self) -> u32 { + 0 + } + + pub fn kill(&mut self) -> io::Result<()> { + match moto_rt::process::kill(self.handle) { + moto_rt::E_OK => Ok(()), + err => Err(map_motor_error(err)), + } + } + + pub fn wait(&mut self) -> io::Result { + moto_rt::process::wait(self.handle).map(|c| ExitStatus(c)).map_err(map_motor_error) + } + + pub fn try_wait(&mut self) -> io::Result> { + match moto_rt::process::try_wait(self.handle) { + Ok(s) => Ok(Some(ExitStatus(s))), + Err(err) => match err { + moto_rt::E_NOT_READY => Ok(None), + err => Err(map_motor_error(err)), + }, + } + } + + #[allow(unused)] + pub fn handle(&self) -> u64 { + self.handle + } +} + +pub struct CommandArgs<'a> { + iter: crate::slice::Iter<'a, String>, +} + +impl<'a> Iterator for CommandArgs<'a> { + type Item = &'a OsStr; + fn next(&mut self) -> Option<&'a OsStr> { + self.iter.next().map(|arg| OsStr::new(arg)) + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a> ExactSizeIterator for CommandArgs<'a> { + fn len(&self) -> usize { + self.iter.len() + } + fn is_empty(&self) -> bool { + self.iter.is_empty() + } +} + +impl<'a> fmt::Debug for CommandArgs<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter.clone()).finish() + } +} diff --git a/std/src/sys/random/mod.rs b/std/src/sys/random/mod.rs index ec81d89a0f3af..91f72d0738790 100644 --- a/std/src/sys/random/mod.rs +++ b/std/src/sys/random/mod.rs @@ -62,6 +62,10 @@ cfg_select! { mod redox; pub use redox::fill_bytes; } + target_os = "motor" => { + mod motor; + pub use motor::fill_bytes; + } all(target_vendor = "fortanix", target_env = "sgx") => { mod sgx; pub use sgx::fill_bytes; diff --git a/std/src/sys/random/motor.rs b/std/src/sys/random/motor.rs new file mode 100644 index 0000000000000..386b3704a91ea --- /dev/null +++ b/std/src/sys/random/motor.rs @@ -0,0 +1,3 @@ +pub fn fill_bytes(bytes: &mut [u8]) { + moto_rt::fill_random_bytes(bytes) +} diff --git a/std/src/sys/stdio/mod.rs b/std/src/sys/stdio/mod.rs index 660317e3ea844..d51ea9ad726b5 100644 --- a/std/src/sys/stdio/mod.rs +++ b/std/src/sys/stdio/mod.rs @@ -13,6 +13,10 @@ cfg_select! { mod sgx; pub use sgx::*; } + target_os = "motor" => { + mod motor; + pub use motor::*; + } target_os = "solid_asp3" => { mod solid; pub use solid::*; diff --git a/std/src/sys/stdio/motor.rs b/std/src/sys/stdio/motor.rs new file mode 100644 index 0000000000000..0a44feab723d0 --- /dev/null +++ b/std/src/sys/stdio/motor.rs @@ -0,0 +1,232 @@ +use crate::os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; +use crate::sys::map_motor_error; +use crate::sys_common::{AsInner, FromInner, IntoInner}; +use crate::{io, process, sys}; + +pub const STDIN_BUF_SIZE: usize = crate::sys::io::DEFAULT_BUF_SIZE; + +pub struct Stdin {} + +impl Stdin { + pub const fn new() -> Self { + Self {} + } +} + +pub struct Stdout {} + +impl Stdout { + pub const fn new() -> Self { + Self {} + } +} + +pub struct Stderr {} + +impl Stderr { + pub const fn new() -> Self { + Self {} + } +} + +impl crate::sealed::Sealed for Stdin {} + +impl crate::io::IsTerminal for Stdin { + fn is_terminal(&self) -> bool { + moto_rt::fs::is_terminal(moto_rt::FD_STDIN) + } +} + +impl io::Read for Stdin { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + moto_rt::fs::read(moto_rt::FD_STDIN, buf).map_err(map_motor_error) + } +} + +impl io::Write for Stdout { + fn write(&mut self, buf: &[u8]) -> io::Result { + moto_rt::fs::write(moto_rt::FD_STDOUT, buf).map_err(map_motor_error) + } + + fn flush(&mut self) -> io::Result<()> { + moto_rt::fs::flush(moto_rt::FD_STDOUT).map_err(map_motor_error) + } +} + +impl io::Write for Stderr { + fn write(&mut self, buf: &[u8]) -> io::Result { + moto_rt::fs::write(moto_rt::FD_STDERR, buf).map_err(map_motor_error) + } + + fn flush(&mut self) -> io::Result<()> { + moto_rt::fs::flush(moto_rt::FD_STDERR).map_err(map_motor_error) + } +} + +pub fn panic_output() -> Option { + Some(Stderr::new()) +} + +pub fn is_ebadf(_err: &io::Error) -> bool { + true +} + +#[stable(feature = "process_extensions", since = "1.2.0")] +impl FromRawFd for process::Stdio { + #[inline] + unsafe fn from_raw_fd(fd: RawFd) -> process::Stdio { + let fd = unsafe { sys::fd::FileDesc::from_raw_fd(fd) }; + let io = sys::process::Stdio::Fd(fd); + process::Stdio::from_inner(io) + } +} + +#[stable(feature = "io_safety", since = "1.63.0")] +impl From for process::Stdio { + /// Takes ownership of a file descriptor and returns a [`Stdio`](process::Stdio) + /// that can attach a stream to it. + #[inline] + fn from(fd: OwnedFd) -> process::Stdio { + let fd = sys::fd::FileDesc::from_inner(fd); + let io = sys::process::Stdio::Fd(fd); + process::Stdio::from_inner(io) + } +} + +#[stable(feature = "process_extensions", since = "1.2.0")] +impl AsRawFd for process::ChildStdin { + #[inline] + fn as_raw_fd(&self) -> RawFd { + self.as_inner().as_raw_fd() + } +} + +#[stable(feature = "process_extensions", since = "1.2.0")] +impl AsRawFd for process::ChildStdout { + #[inline] + fn as_raw_fd(&self) -> RawFd { + self.as_inner().as_raw_fd() + } +} + +#[stable(feature = "process_extensions", since = "1.2.0")] +impl AsRawFd for process::ChildStderr { + #[inline] + fn as_raw_fd(&self) -> RawFd { + self.as_inner().as_raw_fd() + } +} + +#[stable(feature = "into_raw_os", since = "1.4.0")] +impl IntoRawFd for process::ChildStdin { + #[inline] + fn into_raw_fd(self) -> RawFd { + self.into_inner().into_raw_fd() + } +} + +#[stable(feature = "into_raw_os", since = "1.4.0")] +impl IntoRawFd for process::ChildStdout { + #[inline] + fn into_raw_fd(self) -> RawFd { + self.into_inner().into_raw_fd() + } +} + +#[stable(feature = "into_raw_os", since = "1.4.0")] +impl IntoRawFd for process::ChildStderr { + #[inline] + fn into_raw_fd(self) -> RawFd { + self.into_inner().into_raw_fd() + } +} + +#[stable(feature = "io_safety", since = "1.63.0")] +impl AsFd for crate::process::ChildStdin { + #[inline] + fn as_fd(&self) -> BorrowedFd<'_> { + self.as_inner().as_fd() + } +} + +#[stable(feature = "io_safety", since = "1.63.0")] +impl From for OwnedFd { + /// Takes ownership of a [`ChildStdin`](crate::process::ChildStdin)'s file descriptor. + #[inline] + fn from(child_stdin: crate::process::ChildStdin) -> OwnedFd { + child_stdin.into_inner().into_inner() + } +} + +/// Creates a `ChildStdin` from the provided `OwnedFd`. +/// +/// The provided file descriptor must point to a pipe +/// with the `CLOEXEC` flag set. +#[stable(feature = "child_stream_from_fd", since = "1.74.0")] +impl From for process::ChildStdin { + #[inline] + fn from(fd: OwnedFd) -> process::ChildStdin { + let pipe = sys::pipe::AnonPipe::from_inner(fd); + process::ChildStdin::from_inner(pipe) + } +} + +#[stable(feature = "io_safety", since = "1.63.0")] +impl AsFd for crate::process::ChildStdout { + #[inline] + fn as_fd(&self) -> BorrowedFd<'_> { + self.as_inner().as_fd() + } +} + +#[stable(feature = "io_safety", since = "1.63.0")] +impl From for OwnedFd { + /// Takes ownership of a [`ChildStdout`](crate::process::ChildStdout)'s file descriptor. + #[inline] + fn from(child_stdout: crate::process::ChildStdout) -> OwnedFd { + child_stdout.into_inner().into_inner() + } +} + +/// Creates a `ChildStdout` from the provided `OwnedFd`. +/// +/// The provided file descriptor must point to a pipe +/// with the `CLOEXEC` flag set. +#[stable(feature = "child_stream_from_fd", since = "1.74.0")] +impl From for process::ChildStdout { + #[inline] + fn from(fd: OwnedFd) -> process::ChildStdout { + let pipe = sys::pipe::AnonPipe::from_inner(fd); + process::ChildStdout::from_inner(pipe) + } +} + +#[stable(feature = "io_safety", since = "1.63.0")] +impl AsFd for crate::process::ChildStderr { + #[inline] + fn as_fd(&self) -> BorrowedFd<'_> { + self.as_inner().as_fd() + } +} + +#[stable(feature = "io_safety", since = "1.63.0")] +impl From for OwnedFd { + /// Takes ownership of a [`ChildStderr`](crate::process::ChildStderr)'s file descriptor. + #[inline] + fn from(child_stderr: crate::process::ChildStderr) -> OwnedFd { + child_stderr.into_inner().into_inner() + } +} + +/// Creates a `ChildStderr` from the provided `OwnedFd`. +/// +/// The provided file descriptor must point to a pipe +/// with the `CLOEXEC` flag set. +#[stable(feature = "child_stream_from_fd", since = "1.74.0")] +impl From for process::ChildStderr { + #[inline] + fn from(fd: OwnedFd) -> process::ChildStderr { + let pipe = sys::pipe::AnonPipe::from_inner(fd); + process::ChildStderr::from_inner(pipe) + } +} diff --git a/std/src/sys/sync/condvar/mod.rs b/std/src/sys/sync/condvar/mod.rs index cb67d273759dd..83cf0ae629851 100644 --- a/std/src/sys/sync/condvar/mod.rs +++ b/std/src/sys/sync/condvar/mod.rs @@ -6,6 +6,7 @@ cfg_select! { target_os = "freebsd", target_os = "openbsd", target_os = "dragonfly", + target_os = "motor", target_os = "fuchsia", all(target_family = "wasm", target_feature = "atomics"), target_os = "hermit", diff --git a/std/src/sys/sync/mutex/mod.rs b/std/src/sys/sync/mutex/mod.rs index c885b0eabae2d..e3d6ad1129c83 100644 --- a/std/src/sys/sync/mutex/mod.rs +++ b/std/src/sys/sync/mutex/mod.rs @@ -5,6 +5,7 @@ cfg_select! { target_os = "android", target_os = "freebsd", target_os = "openbsd", + target_os = "motor", target_os = "dragonfly", all(target_family = "wasm", target_feature = "atomics"), target_os = "hermit", diff --git a/std/src/sys/sync/once/mod.rs b/std/src/sys/sync/once/mod.rs index 8adeb1f259d72..aeea884b9f617 100644 --- a/std/src/sys/sync/once/mod.rs +++ b/std/src/sys/sync/once/mod.rs @@ -14,6 +14,7 @@ cfg_select! { target_os = "android", all(target_arch = "wasm32", target_feature = "atomics"), target_os = "freebsd", + target_os = "motor", target_os = "openbsd", target_os = "dragonfly", target_os = "fuchsia", diff --git a/std/src/sys/sync/rwlock/mod.rs b/std/src/sys/sync/rwlock/mod.rs index 82f1dd18dee49..ab5715bf2de33 100644 --- a/std/src/sys/sync/rwlock/mod.rs +++ b/std/src/sys/sync/rwlock/mod.rs @@ -9,6 +9,7 @@ cfg_select! { target_os = "fuchsia", all(target_family = "wasm", target_feature = "atomics"), target_os = "hermit", + target_os = "motor", ) => { mod futex; pub use futex::RwLock; diff --git a/std/src/sys/sync/thread_parking/mod.rs b/std/src/sys/sync/thread_parking/mod.rs index b9fb27b4eef2d..e8a9dc884f816 100644 --- a/std/src/sys/sync/thread_parking/mod.rs +++ b/std/src/sys/sync/thread_parking/mod.rs @@ -8,6 +8,7 @@ cfg_select! { target_os = "openbsd", target_os = "dragonfly", target_os = "fuchsia", + target_os = "motor", target_os = "hermit", ) => { mod futex; diff --git a/std/src/sys/thread/mod.rs b/std/src/sys/thread/mod.rs index a20b2a3ddd8ce..b98be62be0ad2 100644 --- a/std/src/sys/thread/mod.rs +++ b/std/src/sys/thread/mod.rs @@ -6,6 +6,10 @@ cfg_select! { mod unsupported; pub use unsupported::{current_os_id, set_name}; } + target_os = "motor" => { + mod motor; + pub use motor::*; + } all(target_vendor = "fortanix", target_env = "sgx") => { mod sgx; pub use sgx::{Thread, current_os_id, sleep, yield_now, DEFAULT_MIN_STACK_SIZE}; diff --git a/std/src/sys/thread/motor.rs b/std/src/sys/thread/motor.rs new file mode 100644 index 0000000000000..0457d8818f326 --- /dev/null +++ b/std/src/sys/thread/motor.rs @@ -0,0 +1,63 @@ +use crate::ffi::CStr; +use crate::io; +use crate::num::NonZeroUsize; +use crate::sys::map_motor_error; +use crate::time::Duration; + +pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 256; + +pub struct Thread { + sys_thread: moto_rt::thread::ThreadHandle, +} + +unsafe impl Send for Thread {} +unsafe impl Sync for Thread {} + +impl Thread { + pub unsafe fn new( + stack: usize, + _name: Option<&str>, + p: Box, + ) -> io::Result { + extern "C" fn __moto_rt_thread_fn(thread_arg: u64) { + unsafe { + Box::from_raw( + core::ptr::with_exposed_provenance::>(thread_arg as usize) + .cast_mut(), + )(); + } + } + + let thread_arg = Box::into_raw(Box::new(p)).expose_provenance() as u64; + let sys_thread = moto_rt::thread::spawn(__moto_rt_thread_fn, stack, thread_arg) + .map_err(map_motor_error)?; + Ok(Self { sys_thread }) + } + + pub fn join(self) { + assert!(moto_rt::thread::join(self.sys_thread) == moto_rt::E_OK) + } +} + +pub fn set_name(name: &CStr) { + let bytes = name.to_bytes(); + if let Ok(s) = core::str::from_utf8(bytes) { + let _ = moto_rt::thread::set_name(s); + } +} + +pub fn current_os_id() -> Option { + None +} + +pub fn available_parallelism() -> io::Result { + Ok(unsafe { NonZeroUsize::new_unchecked(moto_rt::num_cpus()) }) +} + +pub fn yield_now() { + moto_rt::thread::yield_now() +} + +pub fn sleep(dur: Duration) { + moto_rt::thread::sleep_until(moto_rt::time::Instant::now() + dur) +} diff --git a/std/src/sys/thread_local/mod.rs b/std/src/sys/thread_local/mod.rs index f7f051b1addc4..e88011aa22dad 100644 --- a/std/src/sys/thread_local/mod.rs +++ b/std/src/sys/thread_local/mod.rs @@ -187,6 +187,14 @@ pub(crate) mod key { pub(super) use xous::{Key, get, set}; use xous::{create, destroy}; } + target_os = "motor" => { + mod racy; + #[cfg(test)] + mod tests; + pub(super) use racy::LazyKey; + pub(super) use moto_rt::tls::{Key, get, set}; + use moto_rt::tls::{create, destroy}; + } _ => {} } } From 0c4988d58bfdb4baab6defdd1253777abd62c6a1 Mon Sep 17 00:00:00 2001 From: yukang Date: Thu, 9 Oct 2025 01:25:32 +0800 Subject: [PATCH 056/358] referring to repeat_n in std::iter::repeat --- core/src/iter/sources/repeat.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/iter/sources/repeat.rs b/core/src/iter/sources/repeat.rs index 4bcd5b16aea6a..7fcd6f8963a48 100644 --- a/core/src/iter/sources/repeat.rs +++ b/core/src/iter/sources/repeat.rs @@ -8,6 +8,9 @@ use crate::num::NonZero; /// Infinite iterators like `repeat()` are often used with adapters like /// [`Iterator::take()`], in order to make them finite. /// +/// If you know the number of repetitions in advance, consider using [`repeat_n()`] +/// instead, as it is more efficient and conveys the intent more clearly. +/// /// Use [`str::repeat()`] instead of this function if you just want to repeat /// a char/string `n` times. /// @@ -15,6 +18,7 @@ use crate::num::NonZero; /// or if you do not want to keep the repeated element in memory, you can /// instead use the [`repeat_with()`] function. /// +/// [`repeat_n()`]: crate::iter::repeat_n /// [`repeat_with()`]: crate::iter::repeat_with /// [`str::repeat()`]: ../../std/primitive.str.html#method.repeat /// From c90a5f09621dda8602a1df22bc4d95f996a3d2dd Mon Sep 17 00:00:00 2001 From: The 8472 Date: Fri, 3 Oct 2025 13:43:11 +0200 Subject: [PATCH 057/358] specialize slice::fill to use memset when possible LLVM generally can do this on its own, but it helps miri and other backends. --- core/src/slice/specialize.rs | 47 +++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/core/src/slice/specialize.rs b/core/src/slice/specialize.rs index 80eb590587f99..17436395fee69 100644 --- a/core/src/slice/specialize.rs +++ b/core/src/slice/specialize.rs @@ -15,9 +15,54 @@ impl SpecFill for [T] { } impl SpecFill for [T] { - fn spec_fill(&mut self, value: T) { + default fn spec_fill(&mut self, value: T) { for item in self.iter_mut() { *item = value; } } } + +impl SpecFill for [u8] { + fn spec_fill(&mut self, value: u8) { + // SAFETY: The pointer is derived from a reference, so it's writable. + unsafe { + crate::intrinsics::write_bytes(self.as_mut_ptr(), value, self.len()); + } + } +} + +impl SpecFill for [i8] { + fn spec_fill(&mut self, value: i8) { + // SAFETY: The pointer is derived from a reference, so it's writable. + unsafe { + crate::intrinsics::write_bytes(self.as_mut_ptr(), value.cast_unsigned(), self.len()); + } + } +} + +macro spec_fill_int { + ($($type:ty)*) => {$( + impl SpecFill<$type> for [$type] { + #[inline] + fn spec_fill(&mut self, value: $type) { + // We always take this fastpath in Miri for long slices as the manual `for` + // loop can be prohibitively slow. + if (cfg!(miri) && self.len() > 32) || crate::intrinsics::is_val_statically_known(value) { + let bytes = value.to_ne_bytes(); + if value == <$type>::from_ne_bytes([bytes[0]; size_of::<$type>()]) { + // SAFETY: The pointer is derived from a reference, so it's writable. + unsafe { + crate::intrinsics::write_bytes(self.as_mut_ptr(), bytes[0], self.len()); + } + return; + } + } + for item in self.iter_mut() { + *item = value; + } + } + } + )*} +} + +spec_fill_int! { u16 i16 u32 i32 u64 i64 u128 i128 usize isize } From 0c66296dfdc5fe32fba7ae286ad97c1a3644cb1a Mon Sep 17 00:00:00 2001 From: cyrgani Date: Wed, 8 Oct 2025 13:49:32 +0200 Subject: [PATCH 058/358] run zero-size assertion in `const {}` --- proc_macro/src/bridge/selfless_reify.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proc_macro/src/bridge/selfless_reify.rs b/proc_macro/src/bridge/selfless_reify.rs index b06434a5ffee2..a53550e0b9e0c 100644 --- a/proc_macro/src/bridge/selfless_reify.rs +++ b/proc_macro/src/bridge/selfless_reify.rs @@ -50,7 +50,7 @@ macro_rules! define_reify_functions { >(f: F) -> $(extern $abi)? fn($($arg_ty),*) -> $ret_ty { // FIXME(eddyb) describe the `F` type (e.g. via `type_name::`) once panic // formatting becomes possible in `const fn`. - assert!(size_of::() == 0, "selfless_reify: closure must be zero-sized"); + const { assert!(size_of::() == 0, "selfless_reify: closure must be zero-sized"); } $(extern $abi)? fn wrapper< $($($param,)*)? From 0282f2104f37ddc1df9e0042fce4e363b3566e57 Mon Sep 17 00:00:00 2001 From: cyrgani Date: Wed, 8 Oct 2025 14:33:31 +0200 Subject: [PATCH 059/358] inline constants in generated `enum` `Encode` impls --- proc_macro/src/bridge/rpc.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/proc_macro/src/bridge/rpc.rs b/proc_macro/src/bridge/rpc.rs index 7f4f5fc3a97d5..6e5afe282d683 100644 --- a/proc_macro/src/bridge/rpc.rs +++ b/proc_macro/src/bridge/rpc.rs @@ -58,16 +58,11 @@ macro_rules! rpc_encode_decode { fn encode(self, w: &mut Writer, s: &mut S) { // HACK(eddyb): `Tag` enum duplicated between the // two impls as there's no other place to stash it. - #[allow(non_upper_case_globals)] - mod tag { - #[repr(u8)] enum Tag { $($variant),* } - - $(pub(crate) const $variant: u8 = Tag::$variant as u8;)* - } + #[repr(u8)] enum Tag { $($variant),* } match self { $($name::$variant $(($field))* => { - tag::$variant.encode(w, s); + (Tag::$variant as u8).encode(w, s); $($field.encode(w, s);)* })* } From fa544224b9197130098c6d25d0a166468691992c Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Wed, 8 Oct 2025 15:51:21 -0400 Subject: [PATCH 060/358] std::thread spawn: Docs: Link to Builder::spawn; Make same. Replace "use this API instead" with a link to Builder::spawn. Edit the paragraph to make it slightly clearer. The Scope::spawn method already included a. Make the docs for the two nearly the same. --- std/src/thread/mod.rs | 5 ++--- std/src/thread/scoped.rs | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/std/src/thread/mod.rs b/std/src/thread/mod.rs index fd7cce3f97db8..78c85c0af6449 100644 --- a/std/src/thread/mod.rs +++ b/std/src/thread/mod.rs @@ -620,9 +620,8 @@ impl Builder { /// (It is the responsibility of the program to either eventually join threads it /// creates or detach them; otherwise, a resource leak will result.) /// -/// This call will create a thread using default parameters of [`Builder`], if you -/// want to specify the stack size or the name of the thread, use this API -/// instead. +/// This function creates a thread with the default parameters. To specify the +/// new thread's stack size or the name, use [`Builder::spawn`]. /// /// As you can see in the signature of `spawn` there are two constraints on /// both the closure given to `spawn` and its return value, let's explain them: diff --git a/std/src/thread/scoped.rs b/std/src/thread/scoped.rs index a4c0ca5417d00..2368ce4988d89 100644 --- a/std/src/thread/scoped.rs +++ b/std/src/thread/scoped.rs @@ -181,9 +181,8 @@ impl<'scope, 'env> Scope<'scope, 'env> { /// end of the scope. In that case, if the spawned thread panics, [`scope`] will /// panic after all threads are joined. /// - /// This call will create a thread using default parameters of [`Builder`]. - /// If you want to specify the stack size or the name of the thread, use - /// [`Builder::spawn_scoped`] instead. + /// This function creates a thread with the default parameters. To specify the + /// new thread's stack size or the name, use [`Builder::spawn_scoped`]. /// /// # Panics /// From 6d929154d7fac6a86409b918bb77ee8a954729cf Mon Sep 17 00:00:00 2001 From: yukang Date: Wed, 8 Oct 2025 16:39:53 +0800 Subject: [PATCH 061/358] Implement fs api set_times and set_times_nofollow --- std/src/fs.rs | 74 ++++++++++++ std/src/fs/tests.rs | 219 ++++++++++++++++++++++++++++++++++ std/src/sys/fs/hermit.rs | 8 ++ std/src/sys/fs/mod.rs | 8 ++ std/src/sys/fs/solid.rs | 11 ++ std/src/sys/fs/uefi.rs | 8 ++ std/src/sys/fs/unix.rs | 127 ++++++++++++++++++++ std/src/sys/fs/unsupported.rs | 8 ++ std/src/sys/fs/vexos.rs | 8 ++ std/src/sys/fs/wasi.rs | 12 ++ std/src/sys/fs/windows.rs | 17 +++ 11 files changed, 500 insertions(+) diff --git a/std/src/fs.rs b/std/src/fs.rs index 28b2c7173d321..e97190e69d68f 100644 --- a/std/src/fs.rs +++ b/std/src/fs.rs @@ -387,6 +387,80 @@ pub fn write, C: AsRef<[u8]>>(path: P, contents: C) -> io::Result inner(path.as_ref(), contents.as_ref()) } +/// Changes the timestamps of the file or directory at the specified path. +/// +/// This function will attempt to set the access and modification times +/// to the times specified. If the path refers to a symbolic link, this function +/// will follow the link and change the timestamps of the target file. +/// +/// # Platform-specific behavior +/// +/// This function currently corresponds to the `utimensat` function on Unix platforms +/// and the `SetFileTime` function on Windows. +/// +/// # Errors +/// +/// This function will return an error if the user lacks permission to change timestamps on the +/// target file or symlink. It may also return an error if the OS does not support it. +/// +/// # Examples +/// +/// ```no_run +/// #![feature(fs_set_times)] +/// use std::fs::{self, FileTimes}; +/// use std::time::SystemTime; +/// +/// fn main() -> std::io::Result<()> { +/// let now = SystemTime::now(); +/// let times = FileTimes::new() +/// .set_accessed(now) +/// .set_modified(now); +/// fs::set_times("foo.txt", times)?; +/// Ok(()) +/// } +/// ``` +#[unstable(feature = "fs_set_times", issue = "147455")] +pub fn set_times>(path: P, times: FileTimes) -> io::Result<()> { + fs_imp::set_times(path.as_ref(), times.0) +} + +/// Changes the timestamps of the file or symlink at the specified path. +/// +/// This function will attempt to set the access and modification times +/// to the times specified. Differ from `set_times`, if the path refers to a symbolic link, +/// this function will change the timestamps of the symlink itself, not the target file. +/// +/// # Platform-specific behavior +/// +/// This function currently corresponds to the `utimensat` function with `AT_SYMLINK_NOFOLLOW` +/// on Unix platforms and the `SetFileTime` function on Windows after opening the symlink. +/// +/// # Errors +/// +/// This function will return an error if the user lacks permission to change timestamps on the +/// target file or symlink. It may also return an error if the OS does not support it. +/// +/// # Examples +/// +/// ```no_run +/// #![feature(fs_set_times)] +/// use std::fs::{self, FileTimes}; +/// use std::time::SystemTime; +/// +/// fn main() -> std::io::Result<()> { +/// let now = SystemTime::now(); +/// let times = FileTimes::new() +/// .set_accessed(now) +/// .set_modified(now); +/// fs::set_times_nofollow("symlink.txt", times)?; +/// Ok(()) +/// } +/// ``` +#[unstable(feature = "fs_set_times", issue = "147455")] +pub fn set_times_nofollow>(path: P, times: FileTimes) -> io::Result<()> { + fs_imp::set_times_nofollow(path.as_ref(), times.0) +} + #[stable(feature = "file_lock", since = "1.89.0")] impl error::Error for TryLockError {} diff --git a/std/src/fs/tests.rs b/std/src/fs/tests.rs index f8dfb0d633400..4d67ba9248998 100644 --- a/std/src/fs/tests.rs +++ b/std/src/fs/tests.rs @@ -2226,3 +2226,222 @@ fn test_open_options_invalid_combinations() { assert_eq!(err.kind(), ErrorKind::InvalidInput); assert_eq!(err.to_string(), "must specify at least one of read, write, or append access"); } + +#[test] +fn test_fs_set_times() { + #[cfg(target_vendor = "apple")] + use crate::os::darwin::fs::FileTimesExt; + #[cfg(windows)] + use crate::os::windows::fs::FileTimesExt; + + let tmp = tmpdir(); + let path = tmp.join("foo"); + File::create(&path).unwrap(); + + let mut times = FileTimes::new(); + let accessed = SystemTime::UNIX_EPOCH + Duration::from_secs(12345); + let modified = SystemTime::UNIX_EPOCH + Duration::from_secs(54321); + times = times.set_accessed(accessed).set_modified(modified); + + #[cfg(any(windows, target_vendor = "apple"))] + let created = SystemTime::UNIX_EPOCH + Duration::from_secs(32123); + #[cfg(any(windows, target_vendor = "apple"))] + { + times = times.set_created(created); + } + + match fs::set_times(&path, times) { + // Allow unsupported errors on platforms which don't support setting times. + #[cfg(not(any( + windows, + all( + unix, + not(any( + target_os = "android", + target_os = "redox", + target_os = "espidf", + target_os = "horizon" + )) + ) + )))] + Err(e) if e.kind() == ErrorKind::Unsupported => return, + Err(e) => panic!("error setting file times: {e:?}"), + Ok(_) => {} + } + + let metadata = fs::metadata(&path).unwrap(); + assert_eq!(metadata.accessed().unwrap(), accessed); + assert_eq!(metadata.modified().unwrap(), modified); + #[cfg(any(windows, target_vendor = "apple"))] + { + assert_eq!(metadata.created().unwrap(), created); + } +} + +#[test] +fn test_fs_set_times_follows_symlink() { + #[cfg(target_vendor = "apple")] + use crate::os::darwin::fs::FileTimesExt; + #[cfg(windows)] + use crate::os::windows::fs::FileTimesExt; + + let tmp = tmpdir(); + + // Create a target file + let target = tmp.join("target"); + File::create(&target).unwrap(); + + // Create a symlink to the target + #[cfg(unix)] + let link = tmp.join("link"); + #[cfg(unix)] + crate::os::unix::fs::symlink(&target, &link).unwrap(); + + #[cfg(windows)] + let link = tmp.join("link.txt"); + #[cfg(windows)] + crate::os::windows::fs::symlink_file(&target, &link).unwrap(); + + // Get the symlink's own modified time BEFORE calling set_times (to compare later) + // We don't check accessed time because reading metadata may update atime on some platforms. + let link_metadata_before = fs::symlink_metadata(&link).unwrap(); + let link_modified_before = link_metadata_before.modified().unwrap(); + + let mut times = FileTimes::new(); + let accessed = SystemTime::UNIX_EPOCH + Duration::from_secs(12345); + let modified = SystemTime::UNIX_EPOCH + Duration::from_secs(54321); + times = times.set_accessed(accessed).set_modified(modified); + + #[cfg(any(windows, target_vendor = "apple"))] + let created = SystemTime::UNIX_EPOCH + Duration::from_secs(32123); + #[cfg(any(windows, target_vendor = "apple"))] + { + times = times.set_created(created); + } + + // Call fs::set_times on the symlink - it should follow the link and modify the target + match fs::set_times(&link, times) { + // Allow unsupported errors on platforms which don't support setting times. + #[cfg(not(any( + windows, + all( + unix, + not(any( + target_os = "android", + target_os = "redox", + target_os = "espidf", + target_os = "horizon" + )) + ) + )))] + Err(e) if e.kind() == ErrorKind::Unsupported => return, + Err(e) => panic!("error setting file times through symlink: {e:?}"), + Ok(_) => {} + } + + // Verify that the TARGET file's times were changed (following the symlink) + let target_metadata = fs::metadata(&target).unwrap(); + assert_eq!( + target_metadata.accessed().unwrap(), + accessed, + "target file accessed time should match" + ); + assert_eq!( + target_metadata.modified().unwrap(), + modified, + "target file modified time should match" + ); + #[cfg(any(windows, target_vendor = "apple"))] + { + assert_eq!( + target_metadata.created().unwrap(), + created, + "target file created time should match" + ); + } + + // Also verify through the symlink (fs::metadata follows symlinks) + let link_followed_metadata = fs::metadata(&link).unwrap(); + assert_eq!(link_followed_metadata.accessed().unwrap(), accessed); + assert_eq!(link_followed_metadata.modified().unwrap(), modified); + + // Verify that the SYMLINK ITSELF was NOT modified + // Note: We only check modified time, not accessed time, because reading the symlink + // metadata may update its atime on some platforms (e.g., Linux). + let link_metadata_after = fs::symlink_metadata(&link).unwrap(); + assert_eq!( + link_metadata_after.modified().unwrap(), + link_modified_before, + "symlink's own modified time should not change" + ); +} + +#[test] +fn test_fs_set_times_nofollow() { + #[cfg(target_vendor = "apple")] + use crate::os::darwin::fs::FileTimesExt; + #[cfg(windows)] + use crate::os::windows::fs::FileTimesExt; + + let tmp = tmpdir(); + + // Create a target file and a symlink to it + let target = tmp.join("target"); + File::create(&target).unwrap(); + + #[cfg(unix)] + let link = tmp.join("link"); + #[cfg(unix)] + crate::os::unix::fs::symlink(&target, &link).unwrap(); + + #[cfg(windows)] + let link = tmp.join("link.txt"); + #[cfg(windows)] + crate::os::windows::fs::symlink_file(&target, &link).unwrap(); + + let mut times = FileTimes::new(); + let accessed = SystemTime::UNIX_EPOCH + Duration::from_secs(11111); + let modified = SystemTime::UNIX_EPOCH + Duration::from_secs(22222); + times = times.set_accessed(accessed).set_modified(modified); + + #[cfg(any(windows, target_vendor = "apple"))] + let created = SystemTime::UNIX_EPOCH + Duration::from_secs(33333); + #[cfg(any(windows, target_vendor = "apple"))] + { + times = times.set_created(created); + } + + // Set times on the symlink itself (not following it) + match fs::set_times_nofollow(&link, times) { + // Allow unsupported errors on platforms which don't support setting times. + #[cfg(not(any( + windows, + all( + unix, + not(any( + target_os = "android", + target_os = "redox", + target_os = "espidf", + target_os = "horizon" + )) + ) + )))] + Err(e) if e.kind() == ErrorKind::Unsupported => return, + Err(e) => panic!("error setting symlink times: {e:?}"), + Ok(_) => {} + } + + // Read symlink metadata (without following) + let metadata = fs::symlink_metadata(&link).unwrap(); + assert_eq!(metadata.accessed().unwrap(), accessed); + assert_eq!(metadata.modified().unwrap(), modified); + #[cfg(any(windows, target_vendor = "apple"))] + { + assert_eq!(metadata.created().unwrap(), created); + } + + // Verify that the target file's times were NOT changed + let target_metadata = fs::metadata(&target).unwrap(); + assert_ne!(target_metadata.accessed().unwrap(), accessed); + assert_ne!(target_metadata.modified().unwrap(), modified); +} diff --git a/std/src/sys/fs/hermit.rs b/std/src/sys/fs/hermit.rs index 175d919c289dd..21235bcfbd8c5 100644 --- a/std/src/sys/fs/hermit.rs +++ b/std/src/sys/fs/hermit.rs @@ -566,6 +566,14 @@ pub fn set_perm(_p: &Path, _perm: FilePermissions) -> io::Result<()> { Err(Error::from_raw_os_error(22)) } +pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { + Err(Error::from_raw_os_error(22)) +} + +pub fn set_times_nofollow(_p: &Path, _times: FileTimes) -> io::Result<()> { + Err(Error::from_raw_os_error(22)) +} + pub fn rmdir(path: &Path) -> io::Result<()> { run_path_with_cstr(path, &|path| cvt(unsafe { hermit_abi::rmdir(path.as_ptr()) }).map(|_| ())) } diff --git a/std/src/sys/fs/mod.rs b/std/src/sys/fs/mod.rs index 64f5a6b36d3db..b498f9cb7ea72 100644 --- a/std/src/sys/fs/mod.rs +++ b/std/src/sys/fs/mod.rs @@ -161,3 +161,11 @@ pub fn exists(path: &Path) -> io::Result { #[cfg(windows)] with_native_path(path, &imp::exists) } + +pub fn set_times(path: &Path, times: FileTimes) -> io::Result<()> { + with_native_path(path, &|path| imp::set_times(path, times.clone())) +} + +pub fn set_times_nofollow(path: &Path, times: FileTimes) -> io::Result<()> { + with_native_path(path, &|path| imp::set_times_nofollow(path, times.clone())) +} diff --git a/std/src/sys/fs/solid.rs b/std/src/sys/fs/solid.rs index 808a95829114e..39bd9b3cdd70b 100644 --- a/std/src/sys/fs/solid.rs +++ b/std/src/sys/fs/solid.rs @@ -538,6 +538,17 @@ pub fn set_perm(p: &Path, perm: FilePermissions) -> io::Result<()> { Ok(()) } +pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { + Err(io::const_error!(io::ErrorKind::Unsupported, "setting file times not supported",)) +} + +pub fn set_times_nofollow(_p: &Path, _times: FileTimes) -> io::Result<()> { + Err(io::const_error!( + io::ErrorKind::Unsupported, + "setting file times on symlinks not supported", + )) +} + pub fn rmdir(p: &Path) -> io::Result<()> { if stat(p)?.file_type().is_dir() { error::SolidError::err_if_negative(unsafe { abi::SOLID_FS_Unlink(cstr(p)?.as_ptr()) }) diff --git a/std/src/sys/fs/uefi.rs b/std/src/sys/fs/uefi.rs index 5763d7862f5ae..e4e7274ae8cb3 100644 --- a/std/src/sys/fs/uefi.rs +++ b/std/src/sys/fs/uefi.rs @@ -333,6 +333,14 @@ pub fn set_perm(_p: &Path, _perm: FilePermissions) -> io::Result<()> { unsupported() } +pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { + unsupported() +} + +pub fn set_times_nofollow(_p: &Path, _times: FileTimes) -> io::Result<()> { + unsupported() +} + pub fn rmdir(_p: &Path) -> io::Result<()> { unsupported() } diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index bed9ea9139834..578b5f4a1d986 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -1195,6 +1195,55 @@ impl fmt::Debug for OpenOptions { } } +#[cfg(not(any( + target_os = "redox", + target_os = "espidf", + target_os = "horizon", + target_os = "nuttx", +)))] +fn to_timespec(time: Option) -> io::Result { + match time { + Some(time) if let Some(ts) = time.t.to_timespec() => Ok(ts), + Some(time) if time > crate::sys::time::UNIX_EPOCH => Err(io::const_error!( + io::ErrorKind::InvalidInput, + "timestamp is too large to set as a file time", + )), + Some(_) => Err(io::const_error!( + io::ErrorKind::InvalidInput, + "timestamp is too small to set as a file time", + )), + None => Ok(libc::timespec { tv_sec: 0, tv_nsec: libc::UTIME_OMIT as _ }), + } +} + +#[cfg(target_vendor = "apple")] +fn set_attrlist_with_times( + times: &FileTimes, +) -> io::Result<(libc::attrlist, [mem::MaybeUninit; 3], usize)> { + let mut buf = [mem::MaybeUninit::::uninit(); 3]; + let mut num_times = 0; + let mut attrlist: libc::attrlist = unsafe { mem::zeroed() }; + attrlist.bitmapcount = libc::ATTR_BIT_MAP_COUNT; + + if times.created.is_some() { + buf[num_times].write(to_timespec(times.created)?); + num_times += 1; + attrlist.commonattr |= libc::ATTR_CMN_CRTIME; + } + if times.modified.is_some() { + buf[num_times].write(to_timespec(times.modified)?); + num_times += 1; + attrlist.commonattr |= libc::ATTR_CMN_MODTIME; + } + if times.accessed.is_some() { + buf[num_times].write(to_timespec(times.accessed)?); + num_times += 1; + attrlist.commonattr |= libc::ATTR_CMN_ACCTIME; + } + + Ok((attrlist, buf, num_times)) +} + impl File { pub fn open(path: &Path, opts: &OpenOptions) -> io::Result { run_path_with_cstr(path, &|path| File::open_c(path, opts)) @@ -2112,6 +2161,84 @@ fn open_from(from: &Path) -> io::Result<(crate::fs::File, crate::fs::Metadata)> Ok((reader, metadata)) } +fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { + cfg_select! { + any(target_os = "redox", target_os = "espidf", target_os = "horizon", target_os = "nuttx") => { + let _ = (p, times, flags); + Err(io::const_error!( + io::ErrorKind::Unsupported, + "setting file times not supported", + )) + } + target_vendor = "apple" => { + // Apple platforms use setattrlist which supports setting times on symlinks + let (attrlist, buf, num_times) = set_attrlist_with_times(×)?; + let options = if flags == libc::AT_SYMLINK_NOFOLLOW { + libc::FSOPT_NOFOLLOW + } else { + 0 + }; + + cvt(unsafe { libc::setattrlist( + p.as_ptr(), + (&raw const attrlist).cast::().cast_mut(), + buf.as_ptr().cast::().cast_mut(), + num_times * size_of::(), + options as u32 + ) })?; + Ok(()) + } + target_os = "android" => { + let times = [to_timespec(times.accessed)?, to_timespec(times.modified)?]; + // utimensat requires Android API level 19 + cvt(unsafe { + weak!( + fn utimensat(dirfd: c_int, path: *const c_char, times: *const libc::timespec, flags: c_int) -> c_int; + ); + match utimensat.get() { + Some(utimensat) => utimensat(libc::AT_FDCWD, p.as_ptr(), times.as_ptr(), flags), + None => return Err(io::const_error!( + io::ErrorKind::Unsupported, + "setting file times requires Android API level >= 19", + )), + } + })?; + Ok(()) + } + _ => { + #[cfg(all(target_os = "linux", target_env = "gnu", target_pointer_width = "32", not(target_arch = "riscv32")))] + { + use crate::sys::{time::__timespec64, weak::weak}; + + // Added in glibc 2.34 + weak!( + fn __utimensat64(dirfd: c_int, path: *const c_char, times: *const __timespec64, flags: c_int) -> c_int; + ); + + if let Some(utimensat64) = __utimensat64.get() { + let to_timespec = |time: Option| time.map(|time| time.t.to_timespec64()) + .unwrap_or(__timespec64::new(0, libc::UTIME_OMIT as _)); + let times = [to_timespec(times.accessed), to_timespec(times.modified)]; + cvt(unsafe { utimensat64(libc::AT_FDCWD, p.as_ptr(), times.as_ptr(), flags) })?; + return Ok(()); + } + } + let times = [to_timespec(times.accessed)?, to_timespec(times.modified)?]; + cvt(unsafe { libc::utimensat(libc::AT_FDCWD, p.as_ptr(), times.as_ptr(), flags) })?; + Ok(()) + } + } +} + +pub fn set_times(p: &CStr, times: FileTimes) -> io::Result<()> { + // flags = 0 means follow symlinks + set_times_impl(p, times, 0) +} + +pub fn set_times_nofollow(p: &CStr, times: FileTimes) -> io::Result<()> { + set_times_impl(p, times, libc::AT_SYMLINK_NOFOLLOW) +} + #[cfg(target_os = "espidf")] fn open_to_and_set_permissions( to: &Path, diff --git a/std/src/sys/fs/unsupported.rs b/std/src/sys/fs/unsupported.rs index efaddb51b3751..659ea2a8fc276 100644 --- a/std/src/sys/fs/unsupported.rs +++ b/std/src/sys/fs/unsupported.rs @@ -312,6 +312,14 @@ pub fn set_perm(_p: &Path, perm: FilePermissions) -> io::Result<()> { match perm.0 {} } +pub fn set_times(_p: &Path, times: FileTimes) -> io::Result<()> { + match times {} +} + +pub fn set_times_nofollow(_p: &Path, times: FileTimes) -> io::Result<()> { + match times {} +} + pub fn rmdir(_p: &Path) -> io::Result<()> { unsupported() } diff --git a/std/src/sys/fs/vexos.rs b/std/src/sys/fs/vexos.rs index f642e7cb074ec..99b156d535768 100644 --- a/std/src/sys/fs/vexos.rs +++ b/std/src/sys/fs/vexos.rs @@ -492,6 +492,14 @@ pub fn set_perm(_p: &Path, _perm: FilePermissions) -> io::Result<()> { unsupported() } +pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { + unsupported() +} + +pub fn set_times_nofollow(_p: &Path, _times: FileTimes) -> io::Result<()> { + unsupported() +} + pub fn exists(path: &Path) -> io::Result { run_path_with_cstr(path, &|path| Ok(unsafe { vex_sdk::vexFileStatus(path.as_ptr()) } != 0)) } diff --git a/std/src/sys/fs/wasi.rs b/std/src/sys/fs/wasi.rs index 0b65b9cb389df..1e6c0fad5b830 100644 --- a/std/src/sys/fs/wasi.rs +++ b/std/src/sys/fs/wasi.rs @@ -643,6 +643,18 @@ pub fn set_perm(_p: &Path, _perm: FilePermissions) -> io::Result<()> { unsupported() } +pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { + // File times haven't been fully figured out in wasi yet, so this is + // likely temporary + unsupported() +} + +pub fn set_times_nofollow(_p: &Path, _times: FileTimes) -> io::Result<()> { + // File times haven't been fully figured out in wasi yet, so this is + // likely temporary + unsupported() +} + pub fn rmdir(p: &Path) -> io::Result<()> { let (dir, file) = open_parent(p)?; dir.remove_directory(osstr2str(file.as_ref())?) diff --git a/std/src/sys/fs/windows.rs b/std/src/sys/fs/windows.rs index ccfe410627f70..f2d325da35c7d 100644 --- a/std/src/sys/fs/windows.rs +++ b/std/src/sys/fs/windows.rs @@ -1514,6 +1514,23 @@ pub fn set_perm(p: &WCStr, perm: FilePermissions) -> io::Result<()> { } } +pub fn set_times(p: &WCStr, times: FileTimes) -> io::Result<()> { + let mut opts = OpenOptions::new(); + opts.write(true); + opts.custom_flags(c::FILE_FLAG_BACKUP_SEMANTICS); + let file = File::open_native(p, &opts)?; + file.set_times(times) +} + +pub fn set_times_nofollow(p: &WCStr, times: FileTimes) -> io::Result<()> { + let mut opts = OpenOptions::new(); + opts.write(true); + // `FILE_FLAG_OPEN_REPARSE_POINT` for no_follow behavior + opts.custom_flags(c::FILE_FLAG_BACKUP_SEMANTICS | c::FILE_FLAG_OPEN_REPARSE_POINT); + let file = File::open_native(p, &opts)?; + file.set_times(times) +} + fn get_path(f: &File) -> io::Result { fill_utf16_buf( |buf, sz| unsafe { From 0acaefb2606f6f6fdc8b229ba1951ffee2c56f19 Mon Sep 17 00:00:00 2001 From: Yukang Date: Thu, 9 Oct 2025 10:20:48 +0800 Subject: [PATCH 062/358] Update library/std/src/fs.rs Co-authored-by: Josh Triplett --- std/src/fs.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/std/src/fs.rs b/std/src/fs.rs index e97190e69d68f..f374f4155cfd2 100644 --- a/std/src/fs.rs +++ b/std/src/fs.rs @@ -432,8 +432,9 @@ pub fn set_times>(path: P, times: FileTimes) -> io::Result<()> { /// /// # Platform-specific behavior /// -/// This function currently corresponds to the `utimensat` function with `AT_SYMLINK_NOFOLLOW` -/// on Unix platforms and the `SetFileTime` function on Windows after opening the symlink. +/// This function currently corresponds to the `utimensat` function with `AT_SYMLINK_NOFOLLOW` on +/// Unix platforms, the `setattrlist` function with `FSOPT_NOFOLLOW` on Apple platforms, and the +/// `SetFileTime` function on Windows. /// /// # Errors /// From 4b8fc88d709dca7405ef483c122b3df566dcd75e Mon Sep 17 00:00:00 2001 From: Yukang Date: Thu, 9 Oct 2025 10:21:06 +0800 Subject: [PATCH 063/358] Update library/std/src/fs.rs Co-authored-by: Josh Triplett --- std/src/fs.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/std/src/fs.rs b/std/src/fs.rs index f374f4155cfd2..60eefc36ae2be 100644 --- a/std/src/fs.rs +++ b/std/src/fs.rs @@ -395,8 +395,8 @@ pub fn write, C: AsRef<[u8]>>(path: P, contents: C) -> io::Result /// /// # Platform-specific behavior /// -/// This function currently corresponds to the `utimensat` function on Unix platforms -/// and the `SetFileTime` function on Windows. +/// This function currently corresponds to the `utimensat` function on Unix platforms, the +/// `setattrlist` function on Apple platforms, and the `SetFileTime` function on Windows. /// /// # Errors /// From f5082fc69383ac2a1329814c65592431c32fda85 Mon Sep 17 00:00:00 2001 From: yukang Date: Thu, 9 Oct 2025 10:23:34 +0800 Subject: [PATCH 064/358] use proper unsupported --- std/src/sys/fs/solid.rs | 7 ++----- std/src/sys/fs/unix.rs | 2 ++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/std/src/sys/fs/solid.rs b/std/src/sys/fs/solid.rs index 39bd9b3cdd70b..f6d5d3b784d3b 100644 --- a/std/src/sys/fs/solid.rs +++ b/std/src/sys/fs/solid.rs @@ -539,14 +539,11 @@ pub fn set_perm(p: &Path, perm: FilePermissions) -> io::Result<()> { } pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { - Err(io::const_error!(io::ErrorKind::Unsupported, "setting file times not supported",)) + unsupported() } pub fn set_times_nofollow(_p: &Path, _times: FileTimes) -> io::Result<()> { - Err(io::const_error!( - io::ErrorKind::Unsupported, - "setting file times on symlinks not supported", - )) + unsupported() } pub fn rmdir(p: &Path) -> io::Result<()> { diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index 578b5f4a1d986..014b10f06176c 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -1201,6 +1201,7 @@ impl fmt::Debug for OpenOptions { target_os = "horizon", target_os = "nuttx", )))] +#[inline(always)] fn to_timespec(time: Option) -> io::Result { match time { Some(time) if let Some(ts) = time.t.to_timespec() => Ok(ts), @@ -1217,6 +1218,7 @@ fn to_timespec(time: Option) -> io::Result { } #[cfg(target_vendor = "apple")] +#[inline(always)] fn set_attrlist_with_times( times: &FileTimes, ) -> io::Result<(libc::attrlist, [mem::MaybeUninit; 3], usize)> { From 59d4b6ed5e03fad14d6deafc9c19c34626c2380d Mon Sep 17 00:00:00 2001 From: yukang Date: Thu, 9 Oct 2025 10:49:13 +0800 Subject: [PATCH 065/358] rebase #147504 --- std/src/sys/fs/unix.rs | 73 ++++++++---------------------------------- 1 file changed, 13 insertions(+), 60 deletions(-) diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index 014b10f06176c..63a6db77324a9 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -1195,57 +1195,6 @@ impl fmt::Debug for OpenOptions { } } -#[cfg(not(any( - target_os = "redox", - target_os = "espidf", - target_os = "horizon", - target_os = "nuttx", -)))] -#[inline(always)] -fn to_timespec(time: Option) -> io::Result { - match time { - Some(time) if let Some(ts) = time.t.to_timespec() => Ok(ts), - Some(time) if time > crate::sys::time::UNIX_EPOCH => Err(io::const_error!( - io::ErrorKind::InvalidInput, - "timestamp is too large to set as a file time", - )), - Some(_) => Err(io::const_error!( - io::ErrorKind::InvalidInput, - "timestamp is too small to set as a file time", - )), - None => Ok(libc::timespec { tv_sec: 0, tv_nsec: libc::UTIME_OMIT as _ }), - } -} - -#[cfg(target_vendor = "apple")] -#[inline(always)] -fn set_attrlist_with_times( - times: &FileTimes, -) -> io::Result<(libc::attrlist, [mem::MaybeUninit; 3], usize)> { - let mut buf = [mem::MaybeUninit::::uninit(); 3]; - let mut num_times = 0; - let mut attrlist: libc::attrlist = unsafe { mem::zeroed() }; - attrlist.bitmapcount = libc::ATTR_BIT_MAP_COUNT; - - if times.created.is_some() { - buf[num_times].write(to_timespec(times.created)?); - num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_CRTIME; - } - if times.modified.is_some() { - buf[num_times].write(to_timespec(times.modified)?); - num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_MODTIME; - } - if times.accessed.is_some() { - buf[num_times].write(to_timespec(times.accessed)?); - num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_ACCTIME; - } - - Ok((attrlist, buf, num_times)) -} - impl File { pub fn open(path: &Path, opts: &OpenOptions) -> io::Result { run_path_with_cstr(path, &|path| File::open_c(path, opts)) @@ -1760,18 +1709,19 @@ impl TimesAttrlist { if times.created.is_some() { this.buf[this.num_times].write(file_time_to_timespec(times.created)?); this.num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_CRTIME; + this.attrlist.commonattr |= libc::ATTR_CMN_CRTIME; } if times.modified.is_some() { this.buf[this.num_times].write(file_time_to_timespec(times.modified)?); this.num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_MODTIME; + this.attrlist.commonattr |= libc::ATTR_CMN_MODTIME; } if times.accessed.is_some() { this.buf[this.num_times].write(file_time_to_timespec(times.accessed)?); this.num_times += 1; - attrlist.commonattr |= libc::ATTR_CMN_ACCTIME; + this.attrlist.commonattr |= libc::ATTR_CMN_ACCTIME; } + Ok(this) } fn attrlist(&self) -> *mut libc::c_void { @@ -2174,7 +2124,8 @@ fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { } target_vendor = "apple" => { // Apple platforms use setattrlist which supports setting times on symlinks - let (attrlist, buf, num_times) = set_attrlist_with_times(×)?; + //let (attrlist, buf, num_times) = set_attrlist_with_times(×)?; + let ta = TimesAttrlist::from_times(×)?; let options = if flags == libc::AT_SYMLINK_NOFOLLOW { libc::FSOPT_NOFOLLOW } else { @@ -2183,15 +2134,15 @@ fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { cvt(unsafe { libc::setattrlist( p.as_ptr(), - (&raw const attrlist).cast::().cast_mut(), - buf.as_ptr().cast::().cast_mut(), - num_times * size_of::(), + ta.attrlist(), + ta.times_buf(), + ta.times_buf_size(), options as u32 ) })?; Ok(()) } target_os = "android" => { - let times = [to_timespec(times.accessed)?, to_timespec(times.modified)?]; + let times = [file_time_to_timespec(times.accessed)?, file_time_to_timespec(times.modified)?]; // utimensat requires Android API level 19 cvt(unsafe { weak!( @@ -2225,18 +2176,20 @@ fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { return Ok(()); } } - let times = [to_timespec(times.accessed)?, to_timespec(times.modified)?]; + let times = [file_time_to_timespec(times.accessed)?, file_time_to_timespec(times.modified)?]; cvt(unsafe { libc::utimensat(libc::AT_FDCWD, p.as_ptr(), times.as_ptr(), flags) })?; Ok(()) } } } +#[inline(always)] pub fn set_times(p: &CStr, times: FileTimes) -> io::Result<()> { // flags = 0 means follow symlinks set_times_impl(p, times, 0) } +#[inline(always)] pub fn set_times_nofollow(p: &CStr, times: FileTimes) -> io::Result<()> { set_times_impl(p, times, libc::AT_SYMLINK_NOFOLLOW) } From 69efb3a5c3df04758f932617ac1423c835a414ec Mon Sep 17 00:00:00 2001 From: yukang Date: Thu, 9 Oct 2025 11:08:30 +0800 Subject: [PATCH 066/358] support fs::set_times for wasi --- std/src/sys/fs/wasi.rs | 55 ++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/std/src/sys/fs/wasi.rs b/std/src/sys/fs/wasi.rs index 1e6c0fad5b830..92eb35317415f 100644 --- a/std/src/sys/fs/wasi.rs +++ b/std/src/sys/fs/wasi.rs @@ -536,17 +536,9 @@ impl File { } pub fn set_times(&self, times: FileTimes) -> io::Result<()> { - let to_timestamp = |time: Option| match time { - Some(time) if let Some(ts) = time.to_wasi_timestamp() => Ok(ts), - Some(_) => Err(io::const_error!( - io::ErrorKind::InvalidInput, - "timestamp is too large to set as a file time", - )), - None => Ok(0), - }; self.fd.filestat_set_times( - to_timestamp(times.accessed)?, - to_timestamp(times.modified)?, + to_wasi_timestamp_or_now(times.accessed)?, + to_wasi_timestamp_or_now(times.modified)?, times.accessed.map_or(0, |_| wasi::FSTFLAGS_ATIM) | times.modified.map_or(0, |_| wasi::FSTFLAGS_MTIM), ) @@ -643,16 +635,43 @@ pub fn set_perm(_p: &Path, _perm: FilePermissions) -> io::Result<()> { unsupported() } -pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { - // File times haven't been fully figured out in wasi yet, so this is - // likely temporary - unsupported() +#[inline(always)] +pub fn set_times(p: &Path, times: FileTimes) -> io::Result<()> { + let (dir, file) = open_parent(p)?; + set_times_impl(&dir, &file, times, wasi::LOOKUPFLAGS_SYMLINK_FOLLOW) } -pub fn set_times_nofollow(_p: &Path, _times: FileTimes) -> io::Result<()> { - // File times haven't been fully figured out in wasi yet, so this is - // likely temporary - unsupported() +#[inline(always)] +pub fn set_times_nofollow(p: &Path, times: FileTimes) -> io::Result<()> { + let (dir, file) = open_parent(p)?; + set_times_impl(&dir, &file, times, 0) +} + +fn to_wasi_timestamp_or_now(time: Option) -> io::Result { + match time { + Some(time) if let Some(ts) = time.to_wasi_timestamp() => Ok(ts), + Some(_) => Err(io::const_error!( + io::ErrorKind::InvalidInput, + "timestamp is too large to set as a file time", + )), + None => Ok(0), + } +} + +fn set_times_impl( + fd: &WasiFd, + path: &Path, + times: FileTimes, + flags: wasi::Lookupflags, +) -> io::Result<()> { + fd.path_filestat_set_times( + flags, + osstr2str(path.as_ref())?, + to_wasi_timestamp_or_now(times.accessed)?, + to_wasi_timestamp_or_now(times.modified)?, + times.accessed.map_or(0, |_| wasi::FSTFLAGS_ATIM) + | times.modified.map_or(0, |_| wasi::FSTFLAGS_MTIM), + ) } pub fn rmdir(p: &Path) -> io::Result<()> { From f4add620d38a80caa46e66137660df5ce2a1c5a1 Mon Sep 17 00:00:00 2001 From: Yukang Date: Thu, 9 Oct 2025 11:37:48 +0800 Subject: [PATCH 067/358] add doc alias for set_times_nofollow Co-authored-by: Josh Triplett --- std/src/fs.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/std/src/fs.rs b/std/src/fs.rs index 60eefc36ae2be..9841a246d6a5d 100644 --- a/std/src/fs.rs +++ b/std/src/fs.rs @@ -458,6 +458,9 @@ pub fn set_times>(path: P, times: FileTimes) -> io::Result<()> { /// } /// ``` #[unstable(feature = "fs_set_times", issue = "147455")] +#[doc(alias = "utimensat")] +#[doc(alias = "lutimens")] +#[doc(alias = "lutimes")] pub fn set_times_nofollow>(path: P, times: FileTimes) -> io::Result<()> { fs_imp::set_times_nofollow(path.as_ref(), times.0) } From eb8cce9fbb79a6e27b83fb5b6307fad7dae41af8 Mon Sep 17 00:00:00 2001 From: Yukang Date: Thu, 9 Oct 2025 11:38:19 +0800 Subject: [PATCH 068/358] add doc alias for set_times Co-authored-by: Josh Triplett --- std/src/fs.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/std/src/fs.rs b/std/src/fs.rs index 9841a246d6a5d..b548eb4939d42 100644 --- a/std/src/fs.rs +++ b/std/src/fs.rs @@ -420,6 +420,9 @@ pub fn write, C: AsRef<[u8]>>(path: P, contents: C) -> io::Result /// } /// ``` #[unstable(feature = "fs_set_times", issue = "147455")] +#[doc(alias = "utimens")] +#[doc(alias = "utimes")] +#[doc(alias = "utime")] pub fn set_times>(path: P, times: FileTimes) -> io::Result<()> { fs_imp::set_times(path.as_ref(), times.0) } From b4b7862994ba8277f1738e6294f0f314cdfea83b Mon Sep 17 00:00:00 2001 From: yukang Date: Thu, 9 Oct 2025 16:42:54 +0800 Subject: [PATCH 069/358] fix c_char error in Android --- std/src/sys/fs/unix.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index 63a6db77324a9..51849a31f61b5 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -2146,7 +2146,7 @@ fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { // utimensat requires Android API level 19 cvt(unsafe { weak!( - fn utimensat(dirfd: c_int, path: *const c_char, times: *const libc::timespec, flags: c_int) -> c_int; + fn utimensat(dirfd: c_int, path: *const libc::c_char, times: *const libc::timespec, flags: c_int) -> c_int; ); match utimensat.get() { Some(utimensat) => utimensat(libc::AT_FDCWD, p.as_ptr(), times.as_ptr(), flags), From fb23cb75b2496dcaac073afef77da85136ae37cd Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 9 Oct 2025 06:36:51 -0700 Subject: [PATCH 070/358] unsupported: Use `unsupported()` for `set_times` --- std/src/sys/fs/unsupported.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/std/src/sys/fs/unsupported.rs b/std/src/sys/fs/unsupported.rs index 659ea2a8fc276..7901bf5624d74 100644 --- a/std/src/sys/fs/unsupported.rs +++ b/std/src/sys/fs/unsupported.rs @@ -312,8 +312,8 @@ pub fn set_perm(_p: &Path, perm: FilePermissions) -> io::Result<()> { match perm.0 {} } -pub fn set_times(_p: &Path, times: FileTimes) -> io::Result<()> { - match times {} +pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { + unsupported() } pub fn set_times_nofollow(_p: &Path, times: FileTimes) -> io::Result<()> { From c259b92034c51c879da297073cd025bb8927f64f Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 9 Oct 2025 06:37:09 -0700 Subject: [PATCH 071/358] unsupported: Use `unsupported()` for `set_times_nofollow` --- std/src/sys/fs/unsupported.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/std/src/sys/fs/unsupported.rs b/std/src/sys/fs/unsupported.rs index 7901bf5624d74..f222151d18e25 100644 --- a/std/src/sys/fs/unsupported.rs +++ b/std/src/sys/fs/unsupported.rs @@ -316,8 +316,8 @@ pub fn set_times(_p: &Path, _times: FileTimes) -> io::Result<()> { unsupported() } -pub fn set_times_nofollow(_p: &Path, times: FileTimes) -> io::Result<()> { - match times {} +pub fn set_times_nofollow(_p: &Path, _times: FileTimes) -> io::Result<()> { + unsupported() } pub fn rmdir(_p: &Path) -> io::Result<()> { From 9f23d54ebb7f0e7e7393c426e6a6f73e0f47bf67 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 9 Oct 2025 19:17:55 +0200 Subject: [PATCH 072/358] crc32: remove `#[cfg(not(target_arch = "arm"))]` from crc functions They are defined in the aarch64 module, so this cfg is pointless. Note that these instructions do exist for arm, but the aarch64 ones are already stable, so this would need some additional work to implement them for arm. --- stdarch/crates/core_arch/src/aarch64/neon/generated.rs | 2 -- stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml | 6 ------ 2 files changed, 8 deletions(-) diff --git a/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 855261aaecfd0..ef66149677e82 100644 --- a/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -16,7 +16,6 @@ use super::*; #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd)"] #[inline] #[target_feature(enable = "crc")] -#[cfg(not(target_arch = "arm"))] #[cfg_attr(test, assert_instr(crc32cx))] #[stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")] pub fn __crc32cd(crc: u32, data: u64) -> u32 { @@ -33,7 +32,6 @@ pub fn __crc32cd(crc: u32, data: u64) -> u32 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d)"] #[inline] #[target_feature(enable = "crc")] -#[cfg(not(target_arch = "arm"))] #[cfg_attr(test, assert_instr(crc32x))] #[stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")] pub fn __crc32d(crc: u32, data: u64) -> u32 { diff --git a/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index ccdcea980e1b2..770aa58dc5659 100644 --- a/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -13,10 +13,6 @@ auto_llvm_sign_conversion: false neon-stable: &neon-stable FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] -# #[cfg(not(target_arch = "arm"))] -target-not-arm: &target-not-arm - FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm"']]}]] - # #[cfg(not(target_arch = "arm64ec"))] target-not-arm64ec: &target-not-arm64ec FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm64ec"']]}]] @@ -13082,7 +13078,6 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [target_feature, ['enable = "crc"']] - - *target-not-arm - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32x"]] }]] - *aarch64-crc-stable safety: safe @@ -13104,7 +13099,6 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [target_feature, ['enable = "crc"']] - - *target-not-arm - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cx"]] }]] - *aarch64-crc-stable safety: safe From 6b36f59342f89fa8ac36d3957bb2ddc4a9ebd73f Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 10 Oct 2025 12:51:41 +0200 Subject: [PATCH 073/358] remove `#[rustc_inherit_overflow_checks]` from `is_multiple_of` --- core/src/num/uint_macros.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index 752498bfbd815..b5b768cf677aa 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -3552,7 +3552,6 @@ macro_rules! uint_impl { #[rustc_const_stable(feature = "unsigned_is_multiple_of", since = "1.87.0")] #[must_use] #[inline] - #[rustc_inherit_overflow_checks] pub const fn is_multiple_of(self, rhs: Self) -> bool { match rhs { 0 => self == 0, From 51c1e8038d48ca58445e55ab579cc13020b1644a Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Fri, 10 Oct 2025 01:48:09 +0000 Subject: [PATCH 074/358] Remove StatementKind::Deinit. --- core/src/intrinsics/mir.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/intrinsics/mir.rs b/core/src/intrinsics/mir.rs index a800ef1cb9375..8ddce1c758438 100644 --- a/core/src/intrinsics/mir.rs +++ b/core/src/intrinsics/mir.rs @@ -227,7 +227,7 @@ //! //! #### Statements //! - Assign statements work via normal Rust assignment. -//! - [`Retag`], [`StorageLive`], [`StorageDead`], [`Deinit`] statements have an associated function. +//! - [`Retag`], [`StorageLive`], [`StorageDead`] statements have an associated function. //! //! #### Rvalues //! @@ -400,7 +400,6 @@ define!("mir_unwind_resume", define!("mir_storage_live", fn StorageLive(local: T)); define!("mir_storage_dead", fn StorageDead(local: T)); define!("mir_assume", fn Assume(operand: bool)); -define!("mir_deinit", fn Deinit(place: T)); define!("mir_checked", fn Checked(binop: T) -> (T, bool)); define!( "mir_ptr_metadata", From 4d10ff38a1f17452ce038cdffeac8b2f54810356 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 26 Jun 2025 13:44:01 +0000 Subject: [PATCH 075/358] Support #[alloc_error_handler] without the allocator shim Currently it is possible to avoid linking the allocator shim when __rust_no_alloc_shim_is_unstable_v2 is defined when linking rlibs directly as some build systems need. However this requires liballoc to be compiled with --cfg no_global_oom_handling, which places huge restrictions on what functions you can call and makes it impossible to use libstd. Or alternatively you have to define __rust_alloc_error_handler and (when using libstd) __rust_alloc_error_handler_should_panic using #[rustc_std_internal_symbol]. With this commit you can either use libstd and define __rust_alloc_error_handler_should_panic or not use libstd and use #[alloc_error_handler] instead. Both options are still unstable though. Eventually the alloc_error_handler may either be removed entirely (though the PR for that has been stale for years now) or we may start using weak symbols for it instead. For the latter case this commit is a prerequisite anyway. --- alloc/src/alloc.rs | 4 ++-- std/src/alloc.rs | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/alloc/src/alloc.rs b/alloc/src/alloc.rs index 65c8206e9d462..39450f69ce30a 100644 --- a/alloc/src/alloc.rs +++ b/alloc/src/alloc.rs @@ -361,7 +361,7 @@ unsafe fn exchange_malloc(size: usize, align: usize) -> *mut u8 { unsafe extern "Rust" { // This is the magic symbol to call the global alloc error handler. rustc generates // it to call `__rg_oom` if there is a `#[alloc_error_handler]`, or to call the - // default implementations below (`__rdl_oom`) otherwise. + // default implementations below (`__rdl_alloc_error_handler`) otherwise. #[rustc_std_internal_symbol] fn __rust_alloc_error_handler(size: usize, align: usize) -> !; } @@ -425,7 +425,7 @@ pub mod __alloc_error_handler { // called via generated `__rust_alloc_error_handler` if there is no // `#[alloc_error_handler]`. #[rustc_std_internal_symbol] - pub unsafe fn __rdl_oom(size: usize, _align: usize) -> ! { + pub unsafe fn __rdl_alloc_error_handler(size: usize, _align: usize) -> ! { unsafe extern "Rust" { // This symbol is emitted by rustc next to __rust_alloc_error_handler. // Its value depends on the -Zoom={panic,abort} compiler option. diff --git a/std/src/alloc.rs b/std/src/alloc.rs index 1d61630269ac3..daa25c5a50dd6 100644 --- a/std/src/alloc.rs +++ b/std/src/alloc.rs @@ -358,9 +358,10 @@ fn default_alloc_error_hook(layout: Layout) { // This is the default path taken on OOM, and the only path taken on stable with std. // Crucially, it does *not* call any user-defined code, and therefore users do not have to // worry about allocation failure causing reentrancy issues. That makes it different from - // the default `__rdl_oom` defined in alloc (i.e., the default alloc error handler that is - // called when there is no `#[alloc_error_handler]`), which triggers a regular panic and - // thus can invoke a user-defined panic hook, executing arbitrary user-defined code. + // the default `__rdl_alloc_error_handler` defined in alloc (i.e., the default alloc error + // handler that is called when there is no `#[alloc_error_handler]`), which triggers a + // regular panic and thus can invoke a user-defined panic hook, executing arbitrary + // user-defined code. rtprintpanic!("memory allocation of {} bytes failed\n", layout.size()); } } From ea540d0ef2cd8de580bf174b6995290dc200776d Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Thu, 9 Oct 2025 15:32:17 +0000 Subject: [PATCH 076/358] Implement fjcvtzs under the name __jcvt like the C intrinsic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This instruction is only available when the jsconv target_feature is available, so on ARMv8.3 or higher. It is used e.g. by Ruffle[0] to speed up its conversion from f64 to i32, or by any JS engine probably. I’ve picked the stdarch_aarch64_jscvt feature because it’s the name of the FEAT_JSCVT, but hesitated with naming it stdarch_aarch64_jsconv (the name of the target_feature) or stdarch_aarch64_jcvt (the name of the C intrinsic) or stdarch_aarch64_fjcvtzs (the name of the instruction), this choice is purely arbitrary and I guess it could be argued one way or another. I wouldn’t expect it to stay unstable for too long, so ultimately this shouldn’t matter much. This feature is now tracked in this issue[1]. [0] https://github.com/ruffle-rs/ruffle/pull/21780 [1] https://github.com/rust-lang/rust/issues/147555 --- .../core_arch/src/aarch64/neon/generated.rs | 16 +++++++++++++ .../crates/intrinsic-test/src/arm/config.rs | 1 + .../spec/neon/aarch64.spec.yml | 21 +++++++++++++++++ stdarch/intrinsics_data/arm_intrinsics.json | 23 +++++++++++++++++++ 4 files changed, 61 insertions(+) diff --git a/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index f53f618cd7ea8..de60811057589 100644 --- a/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -46,6 +46,22 @@ pub fn __crc32d(crc: u32, data: u64) -> u32 { } unsafe { ___crc32d(crc, data) } } +#[doc = "Floating-point JavaScript convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__jcvt)"] +#[inline] +#[target_feature(enable = "jsconv")] +#[cfg_attr(test, assert_instr(fjcvtzs))] +#[unstable(feature = "stdarch_aarch64_jscvt", issue = "147555")] +pub fn __jcvt(a: f64) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.fjcvtzs" + )] + fn ___jcvt(a: f64) -> i32; + } + unsafe { ___jcvt(a) } +} #[doc = "Signed Absolute difference and Accumulate Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_s8)"] #[inline] diff --git a/stdarch/crates/intrinsic-test/src/arm/config.rs b/stdarch/crates/intrinsic-test/src/arm/config.rs index ba5c22b22b031..d9024eabfaf46 100644 --- a/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -121,6 +121,7 @@ pub const AARCH_CONFIGURATIONS: &str = r#" #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_aarch64_jscvt))] #![feature(fmt_helpers_for_derive)] #![feature(stdarch_neon_f16)] diff --git a/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index a83963589c353..2818dccd124b4 100644 --- a/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -63,6 +63,9 @@ neon-unstable-f16: &neon-unstable-f16 neon-unstable-feat-lut: &neon-unstable-feat-lut FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']] +aarch64-unstable-jscvt: &aarch64-unstable-jscvt + FnCall: [unstable, ['feature = "stdarch_aarch64_jscvt"', 'issue = "147555"']] + # #[cfg(target_endian = "little")] little-endian: &little-endian FnCall: [cfg, ['target_endian = "little"']] @@ -14265,3 +14268,21 @@ intrinsics: - 'vluti4q_laneq_{neon_type[5]}_x2::' - - FnCall: [transmute, [a]] - b + + - name: "__jcvt" + doc: "Floating-point JavaScript convert to signed fixed-point, rounding toward zero" + arguments: ["a: {type}"] + return_type: "i32" + attr: + - FnCall: [target_feature, ['enable = "jsconv"']] + - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["fjcvtzs"]] }]] + - *aarch64-unstable-jscvt + safety: safe + types: + - f64 + compose: + - LLVMLink: + name: "fjcvtzs" + links: + - link: "llvm.aarch64.fjcvtzs" + arch: aarch64,arm64ec diff --git a/stdarch/intrinsics_data/arm_intrinsics.json b/stdarch/intrinsics_data/arm_intrinsics.json index 9d58aad49cd44..19c655cd6d24e 100644 --- a/stdarch/intrinsics_data/arm_intrinsics.json +++ b/stdarch/intrinsics_data/arm_intrinsics.json @@ -119753,5 +119753,28 @@ "LUTI4" ] ] + }, + { + "SIMD_ISA": "Neon", + "name": "__jcvt", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FJCVTZS" + ] + ] } ] From f281a2a99a941e58b9067ee3de9adad7bb345ea7 Mon Sep 17 00:00:00 2001 From: Asger Hautop Drewsen Date: Fri, 10 Oct 2025 18:01:03 +0200 Subject: [PATCH 077/358] Stabilize unsigned_nonzero_div_ceil --- core/src/num/nonzero.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs index d9184e3c9c229..fcdb65bd45c95 100644 --- a/core/src/num/nonzero.rs +++ b/core/src/num/nonzero.rs @@ -1373,7 +1373,6 @@ macro_rules! nonzero_integer_signedness_dependent_impls { /// # Examples /// /// ``` - /// # #![feature(unsigned_nonzero_div_ceil)] /// # use std::num::NonZero; #[doc = concat!("let one = NonZero::new(1", stringify!($Int), ").unwrap();")] #[doc = concat!("let max = NonZero::new(", stringify!($Int), "::MAX).unwrap();")] @@ -1383,7 +1382,11 @@ macro_rules! nonzero_integer_signedness_dependent_impls { #[doc = concat!("let three = NonZero::new(3", stringify!($Int), ").unwrap();")] /// assert_eq!(three.div_ceil(two), two); /// ``` - #[unstable(feature = "unsigned_nonzero_div_ceil", issue = "132968")] + #[stable(feature = "unsigned_nonzero_div_ceil", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable( + feature = "unsigned_nonzero_div_ceil", + since = "CURRENT_RUSTC_VERSION" + )] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] From 8d96e68f719a68a1dd9919bfc5ed95146f74c204 Mon Sep 17 00:00:00 2001 From: Stepan Koltsov Date: Fri, 10 Oct 2025 20:01:02 +0100 Subject: [PATCH 078/358] Fix documentation of Instant::now on mac --- std/src/time.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/std/src/time.rs b/std/src/time.rs index 31187adb6feae..87aaf9091f1bc 100644 --- a/std/src/time.rs +++ b/std/src/time.rs @@ -112,19 +112,19 @@ use crate::sys_common::{FromInner, IntoInner}; /// | Platform | System call | /// |-----------|----------------------------------------------------------------------| /// | SGX | [`insecure_time` usercall]. More information on [timekeeping in SGX] | -/// | UNIX | [clock_gettime (Monotonic Clock)] | -/// | Darwin | [clock_gettime (Monotonic Clock)] | -/// | VXWorks | [clock_gettime (Monotonic Clock)] | +/// | UNIX | [clock_gettime] with `CLOCK_MONOTONIC` | +/// | Darwin | [clock_gettime] with `CLOCK_UPTIME_RAW` | +/// | VXWorks | [clock_gettime] with `CLOCK_MONOTONIC` | /// | SOLID | `get_tim` | -/// | WASI | [__wasi_clock_time_get (Monotonic Clock)] | +/// | WASI | [__wasi_clock_time_get] with `monotonic` | /// | Windows | [QueryPerformanceCounter] | /// /// [currently]: crate::io#platform-specific-behavior /// [QueryPerformanceCounter]: https://docs.microsoft.com/en-us/windows/win32/api/profileapi/nf-profileapi-queryperformancecounter /// [`insecure_time` usercall]: https://edp.fortanix.com/docs/api/fortanix_sgx_abi/struct.Usercalls.html#method.insecure_time /// [timekeeping in SGX]: https://edp.fortanix.com/docs/concepts/rust-std/#codestdtimecode -/// [__wasi_clock_time_get (Monotonic Clock)]: https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/docs.md#clock_time_get -/// [clock_gettime (Monotonic Clock)]: https://linux.die.net/man/3/clock_gettime +/// [__wasi_clock_time_get]: https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/docs.md#clock_time_get +/// [clock_gettime]: https://linux.die.net/man/3/clock_gettime /// /// **Disclaimer:** These system calls might change over time. /// From 9e8727de6ef5ea3d8bfc4c8723e4da515eac9a0d Mon Sep 17 00:00:00 2001 From: cyrgani Date: Fri, 10 Oct 2025 22:19:04 +0200 Subject: [PATCH 079/358] rename `DecodeMut` to `Decode` --- proc_macro/src/bridge/client.rs | 6 +++--- proc_macro/src/bridge/mod.rs | 4 ++-- proc_macro/src/bridge/rpc.rs | 36 ++++++++++++++++----------------- proc_macro/src/bridge/server.rs | 10 ++++----- proc_macro/src/bridge/symbol.rs | 4 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/proc_macro/src/bridge/client.rs b/proc_macro/src/bridge/client.rs index 92558f2b7d9cc..bdaa865a998d6 100644 --- a/proc_macro/src/bridge/client.rs +++ b/proc_macro/src/bridge/client.rs @@ -58,7 +58,7 @@ macro_rules! define_client_handles { } } - impl DecodeMut<'_, '_, S> for $oty { + impl Decode<'_, '_, S> for $oty { fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { $oty { handle: handle::Handle::decode(r, s), @@ -82,7 +82,7 @@ macro_rules! define_client_handles { } } - impl DecodeMut<'_, '_, S> for $ity { + impl Decode<'_, '_, S> for $ity { fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { $ity { handle: handle::Handle::decode(r, s), @@ -276,7 +276,7 @@ fn maybe_install_panic_hook(force_show_panics: bool) { /// Client-side helper for handling client panics, entering the bridge, /// deserializing input and serializing output. // FIXME(eddyb) maybe replace `Bridge::enter` with this? -fn run_client DecodeMut<'a, 's, ()>, R: Encode<()>>( +fn run_client Decode<'a, 's, ()>, R: Encode<()>>( config: BridgeConfig<'_>, f: impl FnOnce(A) -> R, ) -> Buffer { diff --git a/proc_macro/src/bridge/mod.rs b/proc_macro/src/bridge/mod.rs index 582c43c78fcbb..b0ee9c0cc3027 100644 --- a/proc_macro/src/bridge/mod.rs +++ b/proc_macro/src/bridge/mod.rs @@ -143,7 +143,7 @@ mod symbol; use buffer::Buffer; pub use rpc::PanicMessage; -use rpc::{DecodeMut, Encode, Reader, Writer}; +use rpc::{Decode, Encode, Reader, Writer}; /// Configuration for establishing an active connection between a server and a /// client. The server creates the bridge config (`run_server` in `server.rs`), @@ -168,7 +168,7 @@ impl !Sync for BridgeConfig<'_> {} #[forbid(unsafe_code)] #[allow(non_camel_case_types)] mod api_tags { - use super::rpc::{DecodeMut, Encode, Reader, Writer}; + use super::rpc::{Decode, Encode, Reader, Writer}; macro_rules! declare_tags { ($($name:ident { diff --git a/proc_macro/src/bridge/rpc.rs b/proc_macro/src/bridge/rpc.rs index 6e5afe282d683..ed67674a74ab7 100644 --- a/proc_macro/src/bridge/rpc.rs +++ b/proc_macro/src/bridge/rpc.rs @@ -12,7 +12,7 @@ pub(super) trait Encode: Sized { pub(super) type Reader<'a> = &'a [u8]; -pub(super) trait DecodeMut<'a, 's, S>: Sized { +pub(super) trait Decode<'a, 's, S>: Sized { fn decode(r: &mut Reader<'a>, s: &'s mut S) -> Self; } @@ -24,7 +24,7 @@ macro_rules! rpc_encode_decode { } } - impl DecodeMut<'_, '_, S> for $ty { + impl Decode<'_, '_, S> for $ty { fn decode(r: &mut Reader<'_>, _: &mut S) -> Self { const N: usize = size_of::<$ty>(); @@ -43,12 +43,12 @@ macro_rules! rpc_encode_decode { } } - impl<'a, S, $($($T: for<'s> DecodeMut<'a, 's, S>),+)?> DecodeMut<'a, '_, S> + impl<'a, S, $($($T: for<'s> Decode<'a, 's, S>),+)?> Decode<'a, '_, S> for $name $(<$($T),+>)? { fn decode(r: &mut Reader<'a>, s: &mut S) -> Self { $name { - $($field: DecodeMut::decode(r, s)),* + $($field: Decode::decode(r, s)),* } } } @@ -69,7 +69,7 @@ macro_rules! rpc_encode_decode { } } - impl<'a, S, $($($T: for<'s> DecodeMut<'a, 's, S>),+)?> DecodeMut<'a, '_, S> + impl<'a, S, $($($T: for<'s> Decode<'a, 's, S>),+)?> Decode<'a, '_, S> for $name $(<$($T),+>)? { fn decode(r: &mut Reader<'a>, s: &mut S) -> Self { @@ -84,7 +84,7 @@ macro_rules! rpc_encode_decode { match u8::decode(r, s) { $(tag::$variant => { - $(let $field = DecodeMut::decode(r, s);)* + $(let $field = Decode::decode(r, s);)* $name::$variant $(($field))* })* _ => unreachable!(), @@ -98,7 +98,7 @@ impl Encode for () { fn encode(self, _: &mut Writer, _: &mut S) {} } -impl DecodeMut<'_, '_, S> for () { +impl Decode<'_, '_, S> for () { fn decode(_: &mut Reader<'_>, _: &mut S) -> Self {} } @@ -108,7 +108,7 @@ impl Encode for u8 { } } -impl DecodeMut<'_, '_, S> for u8 { +impl Decode<'_, '_, S> for u8 { fn decode(r: &mut Reader<'_>, _: &mut S) -> Self { let x = r[0]; *r = &r[1..]; @@ -125,7 +125,7 @@ impl Encode for bool { } } -impl DecodeMut<'_, '_, S> for bool { +impl Decode<'_, '_, S> for bool { fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { match u8::decode(r, s) { 0 => false, @@ -141,7 +141,7 @@ impl Encode for char { } } -impl DecodeMut<'_, '_, S> for char { +impl Decode<'_, '_, S> for char { fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { char::from_u32(u32::decode(r, s)).unwrap() } @@ -153,7 +153,7 @@ impl Encode for NonZero { } } -impl DecodeMut<'_, '_, S> for NonZero { +impl Decode<'_, '_, S> for NonZero { fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { Self::new(u32::decode(r, s)).unwrap() } @@ -166,11 +166,11 @@ impl, B: Encode> Encode for (A, B) { } } -impl<'a, S, A: for<'s> DecodeMut<'a, 's, S>, B: for<'s> DecodeMut<'a, 's, S>> DecodeMut<'a, '_, S> +impl<'a, S, A: for<'s> Decode<'a, 's, S>, B: for<'s> Decode<'a, 's, S>> Decode<'a, '_, S> for (A, B) { fn decode(r: &mut Reader<'a>, s: &mut S) -> Self { - (DecodeMut::decode(r, s), DecodeMut::decode(r, s)) + (Decode::decode(r, s), Decode::decode(r, s)) } } @@ -181,7 +181,7 @@ impl Encode for &[u8] { } } -impl<'a, S> DecodeMut<'a, '_, S> for &'a [u8] { +impl<'a, S> Decode<'a, '_, S> for &'a [u8] { fn decode(r: &mut Reader<'a>, s: &mut S) -> Self { let len = usize::decode(r, s); let xs = &r[..len]; @@ -196,7 +196,7 @@ impl Encode for &str { } } -impl<'a, S> DecodeMut<'a, '_, S> for &'a str { +impl<'a, S> Decode<'a, '_, S> for &'a str { fn decode(r: &mut Reader<'a>, s: &mut S) -> Self { str::from_utf8(<&[u8]>::decode(r, s)).unwrap() } @@ -208,7 +208,7 @@ impl Encode for String { } } -impl DecodeMut<'_, '_, S> for String { +impl Decode<'_, '_, S> for String { fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { <&str>::decode(r, s).to_string() } @@ -223,7 +223,7 @@ impl> Encode for Vec { } } -impl<'a, S, T: for<'s> DecodeMut<'a, 's, S>> DecodeMut<'a, '_, S> for Vec { +impl<'a, S, T: for<'s> Decode<'a, 's, S>> Decode<'a, '_, S> for Vec { fn decode(r: &mut Reader<'a>, s: &mut S) -> Self { let len = usize::decode(r, s); let mut vec = Vec::with_capacity(len); @@ -283,7 +283,7 @@ impl Encode for PanicMessage { } } -impl DecodeMut<'_, '_, S> for PanicMessage { +impl Decode<'_, '_, S> for PanicMessage { fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { match Option::::decode(r, s) { Some(s) => PanicMessage::String(s), diff --git a/proc_macro/src/bridge/server.rs b/proc_macro/src/bridge/server.rs index 2850e1099b700..e9ef26c07f24f 100644 --- a/proc_macro/src/bridge/server.rs +++ b/proc_macro/src/bridge/server.rs @@ -32,7 +32,7 @@ macro_rules! define_server_handles { } } - impl DecodeMut<'_, '_, HandleStore>> + impl Decode<'_, '_, HandleStore>> for Marked { fn decode(r: &mut Reader<'_>, s: &mut HandleStore>) -> Self { @@ -40,7 +40,7 @@ macro_rules! define_server_handles { } } - impl<'s, S: Types> DecodeMut<'_, 's, HandleStore>> + impl<'s, S: Types> Decode<'_, 's, HandleStore>> for &'s Marked { fn decode(r: &mut Reader<'_>, s: &'s mut HandleStore>) -> Self { @@ -48,7 +48,7 @@ macro_rules! define_server_handles { } } - impl<'s, S: Types> DecodeMut<'_, 's, HandleStore>> + impl<'s, S: Types> Decode<'_, 's, HandleStore>> for &'s mut Marked { fn decode( @@ -67,7 +67,7 @@ macro_rules! define_server_handles { } } - impl DecodeMut<'_, '_, HandleStore>> + impl Decode<'_, '_, HandleStore>> for Marked { fn decode(r: &mut Reader<'_>, s: &mut HandleStore>) -> Self { @@ -355,7 +355,7 @@ pub trait MessagePipe: Sized { fn run_server< S: Server, I: Encode>>, - O: for<'a, 's> DecodeMut<'a, 's, HandleStore>>, + O: for<'a, 's> Decode<'a, 's, HandleStore>>, >( strategy: &impl ExecutionStrategy, handle_counters: &'static client::HandleCounters, diff --git a/proc_macro/src/bridge/symbol.rs b/proc_macro/src/bridge/symbol.rs index eb7d30f9a6cc9..0d6a725fddd98 100644 --- a/proc_macro/src/bridge/symbol.rs +++ b/proc_macro/src/bridge/symbol.rs @@ -102,7 +102,7 @@ impl Encode for Symbol { } } -impl DecodeMut<'_, '_, server::HandleStore>> +impl Decode<'_, '_, server::HandleStore>> for Marked { fn decode(r: &mut Reader<'_>, s: &mut server::HandleStore>) -> Self { @@ -118,7 +118,7 @@ impl Encode>> } } -impl DecodeMut<'_, '_, S> for Symbol { +impl Decode<'_, '_, S> for Symbol { fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { Symbol::new(<&str>::decode(r, s)) } From 8fb1024ac7dc75ffe5df4c27e9285c1ad287723d Mon Sep 17 00:00:00 2001 From: sayantn Date: Sat, 11 Oct 2025 04:14:40 +0530 Subject: [PATCH 080/358] Disable AMD-specific intrinsic tests in the SDE run --- stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 3 +++ stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def | 7 +++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index c9951a77ff6c8..bbebe2d7fa6b6 100644 --- a/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -14,3 +14,6 @@ RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \ -rtm-mode full -tsx --" +# SDE doesn't support AMD extensions +# FIXME: find a way to test these +ENV STDARCH_TEST_SKIP_FEATURE="sse4a,tbm,xop" diff --git a/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def b/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def index 95cef6199311b..342f7d83a63e3 100644 --- a/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def +++ b/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def @@ -12,7 +12,7 @@ # CPUID_VERSION = 1.0 # Input => Output # EAX ECX => EAX EBX ECX EDX -00000000 ******** => 00000024 68747541 444d4163 69746e65 +00000000 ******** => 00000024 756e6547 6c65746e 49656e69 00000001 ******** => 00400f10 00100800 7ffaf3ff bfebfbff 00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000 00000003 ******** => 00000000 00000000 00000000 00000000 @@ -49,7 +49,7 @@ 00000024 00000000 => 00000001 00070002 00000000 00000000 #AVX10 00000024 00000001 => 00000000 00000000 00000004 00000000 80000000 ******** => 80000008 00000000 00000000 00000000 -80000001 ******** => 00000000 00000000 00200961 2c100000 +80000001 ******** => 00000000 00000000 00000121 2c100000 80000002 ******** => 00000000 00000000 00000000 00000000 80000003 ******** => 00000000 00000000 00000000 00000000 80000004 ******** => 00000000 00000000 00000000 00000000 @@ -59,5 +59,4 @@ 80000008 ******** => 00003028 00000200 00000200 00000000 # This file was copied from intel-sde/misc/cpuid/future/cpuid.def, and modified to -# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`, -# `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID. +# add support for `AVX512_VP2INTERSECT` From ff980f23bd87f64ff2cd95e6c0ece272ae271761 Mon Sep 17 00:00:00 2001 From: Dawid Lachowicz Date: Fri, 25 Jul 2025 07:50:46 +0100 Subject: [PATCH 081/358] Guard HIR lowered contracts with contract_checks Refactor contract HIR lowering to ensure no contract code is executed when contract-checks are disabled. The call to contract_checks is moved to inside the lowered fn body, and contract closures are built conditionally, ensuring no side-effects present in contracts occur when those are disabled. --- core/src/intrinsics/mod.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index cef700be9ea1f..0d3be136d1325 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -2637,9 +2637,10 @@ pub const unsafe fn const_make_global(ptr: *mut u8) -> *const u8 { /// of not prematurely committing at compile-time to whether contract /// checking is turned on, so that we can specify contracts in libstd /// and let an end user opt into turning them on. -#[rustc_const_unstable(feature = "contracts_internals", issue = "128044" /* compiler-team#759 */)] #[unstable(feature = "contracts_internals", issue = "128044" /* compiler-team#759 */)] +#[rustc_const_unstable(feature = "contracts", issue = "128044")] #[inline(always)] +#[lang = "contract_checks"] #[rustc_intrinsic] pub const fn contract_checks() -> bool { // FIXME: should this be `false` or `cfg!(contract_checks)`? @@ -2668,7 +2669,7 @@ pub const fn contract_check_requires bool + Copy>(cond: C) { if const { // Do nothing } else { - if contract_checks() && !cond() { + if !cond() { // Emit no unwind panic in case this was a safety requirement. crate::panicking::panic_nounwind("failed requires check"); } @@ -2681,6 +2682,8 @@ pub const fn contract_check_requires bool + Copy>(cond: C) { /// By default, if `contract_checks` is enabled, this will panic with no unwind if the condition /// returns false. /// +/// If `cond` is `None`, then no postcondition checking is performed. +/// /// Note that this function is a no-op during constant evaluation. #[unstable(feature = "contracts_internals", issue = "128044")] // Similar to `contract_check_requires`, we need to use the user-facing @@ -2689,16 +2692,24 @@ pub const fn contract_check_requires bool + Copy>(cond: C) { #[rustc_const_unstable(feature = "contracts", issue = "128044")] #[lang = "contract_check_ensures"] #[rustc_intrinsic] -pub const fn contract_check_ensures bool + Copy, Ret>(cond: C, ret: Ret) -> Ret { +pub const fn contract_check_ensures bool + Copy, Ret>( + cond: Option, + ret: Ret, +) -> Ret { const_eval_select!( - @capture[C: Fn(&Ret) -> bool + Copy, Ret] { cond: C, ret: Ret } -> Ret : + @capture[C: Fn(&Ret) -> bool + Copy, Ret] { cond: Option, ret: Ret } -> Ret : if const { // Do nothing ret } else { - if contract_checks() && !cond(&ret) { - // Emit no unwind panic in case this was a safety requirement. - crate::panicking::panic_nounwind("failed ensures check"); + match cond { + crate::option::Option::Some(cond) => { + if !cond(&ret) { + // Emit no unwind panic in case this was a safety requirement. + crate::panicking::panic_nounwind("failed ensures check"); + } + }, + crate::option::Option::None => {}, } ret } From 0e99d9a9eae629681709c881122fa631878dfa3d Mon Sep 17 00:00:00 2001 From: Dawid Lachowicz Date: Thu, 4 Sep 2025 08:24:00 +0100 Subject: [PATCH 082/358] Remove no longer used contract_checks intrinsic The contract_checks compiler flag is now used to determine if runtime contract checks should be enabled, as opposed to the compiler intrinsic as previously. --- core/src/intrinsics/mod.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index 0d3be136d1325..c397e762d5589 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -2631,24 +2631,6 @@ pub const unsafe fn const_make_global(ptr: *mut u8) -> *const u8 { ptr } -/// Returns whether we should perform contract-checking at runtime. -/// -/// This is meant to be similar to the ub_checks intrinsic, in terms -/// of not prematurely committing at compile-time to whether contract -/// checking is turned on, so that we can specify contracts in libstd -/// and let an end user opt into turning them on. -#[unstable(feature = "contracts_internals", issue = "128044" /* compiler-team#759 */)] -#[rustc_const_unstable(feature = "contracts", issue = "128044")] -#[inline(always)] -#[lang = "contract_checks"] -#[rustc_intrinsic] -pub const fn contract_checks() -> bool { - // FIXME: should this be `false` or `cfg!(contract_checks)`? - - // cfg!(contract_checks) - false -} - /// Check if the pre-condition `cond` has been met. /// /// By default, if `contract_checks` is enabled, this will panic with no unwind if the condition From 2c9c0de79cdcea3f6209a23d947a6e02c9a641d0 Mon Sep 17 00:00:00 2001 From: beepster4096 <19316085+beepster4096@users.noreply.github.com> Date: Wed, 13 Aug 2025 18:02:24 -0700 Subject: [PATCH 083/358] remove copyforderef from custom_mir it did not create DerefTemp locals when used, so it was never actually correct. --- core/src/intrinsics/mir.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/intrinsics/mir.rs b/core/src/intrinsics/mir.rs index a800ef1cb9375..85c5a2c5228e1 100644 --- a/core/src/intrinsics/mir.rs +++ b/core/src/intrinsics/mir.rs @@ -233,7 +233,7 @@ //! //! - Operands implicitly convert to `Use` rvalues. //! - `&`, `&mut`, `addr_of!`, and `addr_of_mut!` all work to create their associated rvalue. -//! - [`CopyForDeref`], [`CastTransmute`], [`CastPtrToPtr`], [`CastUnsize`], and [`Discriminant`] +//! - [`CastTransmute`], [`CastPtrToPtr`], [`CastUnsize`], and [`Discriminant`] //! have associated functions. //! - Unary and binary operations use their normal Rust syntax - `a * b`, `!c`, etc. //! - The binary operation `Offset` can be created via [`Offset`]. @@ -406,7 +406,6 @@ define!( "mir_ptr_metadata", fn PtrMetadata(place: *const P) ->

::Metadata ); -define!("mir_copy_for_deref", fn CopyForDeref(place: T) -> T); define!("mir_retag", fn Retag(place: T)); define!("mir_move", fn Move(place: T) -> T); define!("mir_static", fn Static(s: T) -> &'static T); From 5a86dd5293113076c87fcfba6061e86e58225fd7 Mon Sep 17 00:00:00 2001 From: Tropical <42101043+tropicaaal@users.noreply.github.com> Date: Sat, 11 Oct 2025 14:42:26 -0500 Subject: [PATCH 084/358] vexos: implement `pal::os::exit` --- std/src/sys/pal/vexos/mod.rs | 1 - std/src/sys/pal/vexos/os.rs | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 std/src/sys/pal/vexos/os.rs diff --git a/std/src/sys/pal/vexos/mod.rs b/std/src/sys/pal/vexos/mod.rs index 61a34b0f68a30..3a37c8f2d4885 100644 --- a/std/src/sys/pal/vexos/mod.rs +++ b/std/src/sys/pal/vexos/mod.rs @@ -1,4 +1,3 @@ -#[path = "../unsupported/os.rs"] pub mod os; #[path = "../unsupported/pipe.rs"] pub mod pipe; diff --git a/std/src/sys/pal/vexos/os.rs b/std/src/sys/pal/vexos/os.rs new file mode 100644 index 0000000000000..405f7c918f4a5 --- /dev/null +++ b/std/src/sys/pal/vexos/os.rs @@ -0,0 +1,19 @@ +#[expect(dead_code)] +#[path = "../unsupported/os.rs"] +mod unsupported_os; +pub use unsupported_os::{ + JoinPathsError, SplitPaths, chdir, current_exe, errno, error_string, getcwd, getpid, home_dir, + join_paths, split_paths, temp_dir, +}; + +pub use super::unsupported; + +pub fn exit(_code: i32) -> ! { + unsafe { + vex_sdk::vexSystemExitRequest(); + + loop { + vex_sdk::vexTasksRun(); + } + } +} From bebc230eeb22bbeac589633db2db4df0c3f1413c Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Sat, 30 Aug 2025 18:23:34 +0000 Subject: [PATCH 085/358] Remove unreachable expression warning from std. --- std/src/sync/poison.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/std/src/sync/poison.rs b/std/src/sync/poison.rs index 17abdb9819bf9..5517082033380 100644 --- a/std/src/sync/poison.rs +++ b/std/src/sync/poison.rs @@ -60,6 +60,9 @@ //! while it is locked exclusively (write mode). If a panic occurs in any reader, //! then the lock will not be poisoned. +// If we are not unwinding, `PoisonError` is uninhabited. +#![cfg_attr(not(panic = "unwind"), expect(unreachable_code))] + #[stable(feature = "rust1", since = "1.0.0")] pub use self::condvar::Condvar; #[unstable(feature = "mapped_lock_guards", issue = "117108")] From 9a7c54ef583eaf1d053ed8566e1ebbb9cd92ce3f Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sun, 12 Oct 2025 20:14:54 +1100 Subject: [PATCH 086/358] Add doc links between `{integer}::from_str_radix` and `from_str` --- core/src/num/mod.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/src/num/mod.rs b/core/src/num/mod.rs index c75ee11d15efe..35141dfeb3a6d 100644 --- a/core/src/num/mod.rs +++ b/core/src/num/mod.rs @@ -1422,6 +1422,10 @@ macro_rules! from_str_int_impl { /// whitespace) represent an error. Underscores (which are accepted in Rust literals) /// also represent an error. /// + /// # See also + /// For parsing numbers in other bases, such as binary or hexadecimal, + /// see [`from_str_radix`][Self::from_str_radix]. + /// /// # Examples /// /// ``` @@ -1467,6 +1471,14 @@ macro_rules! from_str_int_impl { /// /// This function panics if `radix` is not in the range from 2 to 36. /// + /// # See also + /// If the string to be parsed is in base 10 (decimal), + /// [`from_str`] or [`str::parse`] can also be used. + /// + // FIXME(#122566): These HTML links work around a rustdoc-json test failure. + /// [`from_str`]: #method.from_str + /// [`str::parse`]: primitive.str.html#method.parse + /// /// # Examples /// /// ``` From 747d87bf0fbbee96253c407ebcaf33ca56b7b06d Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sun, 12 Oct 2025 19:12:15 -0400 Subject: [PATCH 087/358] Avoid redundant UB check in RangeFrom slice indexing --- core/src/slice/index.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/src/slice/index.rs b/core/src/slice/index.rs index de220e7e38a4b..a9806060d3d81 100644 --- a/core/src/slice/index.rs +++ b/core/src/slice/index.rs @@ -564,7 +564,10 @@ unsafe impl const SliceIndex<[T]> for ops::RangeFrom { slice_index_fail(self.start, slice.len(), slice.len()) } // SAFETY: `self` is checked to be valid and in bounds above. - unsafe { &*self.get_unchecked(slice) } + unsafe { + let new_len = crate::intrinsics::unchecked_sub(slice.len(), self.start); + &*get_offset_len_noubcheck(slice, self.start, new_len) + } } #[inline] @@ -573,7 +576,10 @@ unsafe impl const SliceIndex<[T]> for ops::RangeFrom { slice_index_fail(self.start, slice.len(), slice.len()) } // SAFETY: `self` is checked to be valid and in bounds above. - unsafe { &mut *self.get_unchecked_mut(slice) } + unsafe { + let new_len = crate::intrinsics::unchecked_sub(slice.len(), self.start); + &mut *get_offset_len_mut_noubcheck(slice, self.start, new_len) + } } } From 536213355fbbafdc65f114cf874abb309de71d76 Mon Sep 17 00:00:00 2001 From: Kivooeo Date: Tue, 14 Oct 2025 06:13:22 +0000 Subject: [PATCH 088/358] replace manual implementation with carrying_mul_add --- core/src/num/bignum.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/src/num/bignum.rs b/core/src/num/bignum.rs index f21fe0b4438fb..33b1e8cb56db2 100644 --- a/core/src/num/bignum.rs +++ b/core/src/num/bignum.rs @@ -38,9 +38,8 @@ macro_rules! impl_full_ops { fn full_mul_add(self, other: $ty, other2: $ty, carry: $ty) -> ($ty, $ty) { // This cannot overflow; // the output is between `0` and `2^nbits * (2^nbits - 1)`. - let v = (self as $bigty) * (other as $bigty) + (other2 as $bigty) + - (carry as $bigty); - ((v >> <$ty>::BITS) as $ty, v as $ty) + let (lo, hi) = self.carrying_mul_add(other, other2, carry); + (hi, lo) } fn full_div_rem(self, other: $ty, borrow: $ty) -> ($ty, $ty) { From 1611e77b8c240b74c2bafc6162d445a1445705dd Mon Sep 17 00:00:00 2001 From: cyrgani <85427285+cyrgani@users.noreply.github.com> Date: Tue, 14 Oct 2025 10:23:29 +0200 Subject: [PATCH 089/358] fix missing link to `std::char` in `std` docs --- std/src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/std/src/lib.rs b/std/src/lib.rs index 46eb120d65de3..60c8f53a0cc40 100644 --- a/std/src/lib.rs +++ b/std/src/lib.rs @@ -65,7 +65,7 @@ //! So for example there is a [page for the primitive type //! `char`](primitive::char) that lists all the methods that can be called on //! characters (very useful), and there is a [page for the module -//! `std::char`] that documents iterator and error types created by these methods +//! `std::char`](crate::char) that documents iterator and error types created by these methods //! (rarely useful). //! //! Note the documentation for the primitives [`str`] and [`[T]`][prim@slice] (also @@ -180,9 +180,6 @@ //! //! //! [I/O]: io -//! [`MIN`]: i32::MIN -//! [`MAX`]: i32::MAX -//! [page for the module `std::i32`]: crate::i32 //! [TCP]: net::TcpStream //! [The Rust Prelude]: prelude //! [UDP]: net::UdpSocket From 631b2d22224dcb3902a82a21d2772a003368924b Mon Sep 17 00:00:00 2001 From: joboet Date: Sat, 13 Sep 2025 13:01:11 +0200 Subject: [PATCH 090/358] std: improve handling of timed condition variable waits on macOS --- Cargo.lock | 4 +- std/Cargo.toml | 2 +- std/src/sys/pal/unix/sync/condvar.rs | 99 ++++++++++++++++++++++------ std/tests/sync/condvar.rs | 32 +++++++++ 4 files changed, 115 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47fbf5169f491..b5b534c986b99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,9 +139,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.175" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" dependencies = [ "rustc-std-workspace-core", ] diff --git a/std/Cargo.toml b/std/Cargo.toml index 779b07ce240a6..d4108a57acaf0 100644 --- a/std/Cargo.toml +++ b/std/Cargo.toml @@ -33,7 +33,7 @@ miniz_oxide = { version = "0.8.0", optional = true, default-features = false } addr2line = { version = "0.25.0", optional = true, default-features = false } [target.'cfg(not(all(windows, target_env = "msvc")))'.dependencies] -libc = { version = "0.2.172", default-features = false, features = [ +libc = { version = "0.2.177", default-features = false, features = [ 'rustc-dep-of-std', ], public = true } diff --git a/std/src/sys/pal/unix/sync/condvar.rs b/std/src/sys/pal/unix/sync/condvar.rs index efa6f8d776559..b6c3ba4136f2a 100644 --- a/std/src/sys/pal/unix/sync/condvar.rs +++ b/std/src/sys/pal/unix/sync/condvar.rs @@ -1,11 +1,6 @@ use super::Mutex; use crate::cell::UnsafeCell; use crate::pin::Pin; -#[cfg(not(target_os = "nto"))] -use crate::sys::pal::time::TIMESPEC_MAX; -#[cfg(target_os = "nto")] -use crate::sys::pal::time::TIMESPEC_MAX_CAPPED; -use crate::sys::pal::time::Timespec; use crate::time::Duration; pub struct Condvar { @@ -47,27 +42,29 @@ impl Condvar { let r = unsafe { libc::pthread_cond_wait(self.raw(), mutex.raw()) }; debug_assert_eq!(r, 0); } +} +#[cfg(not(target_vendor = "apple"))] +impl Condvar { /// # Safety /// * `init` must have been called on this instance. /// * `mutex` must be locked by the current thread. /// * This condition variable may only be used with the same mutex. pub unsafe fn wait_timeout(&self, mutex: Pin<&Mutex>, dur: Duration) -> bool { + #[cfg(not(target_os = "nto"))] + use crate::sys::pal::time::TIMESPEC_MAX; + #[cfg(target_os = "nto")] + use crate::sys::pal::time::TIMESPEC_MAX_CAPPED; + use crate::sys::pal::time::Timespec; + let mutex = mutex.raw(); - // OSX implementation of `pthread_cond_timedwait` is buggy - // with super long durations. When duration is greater than - // 0x100_0000_0000_0000 seconds, `pthread_cond_timedwait` - // in macOS Sierra returns error 316. - // - // This program demonstrates the issue: - // https://gist.github.com/stepancheg/198db4623a20aad2ad7cddb8fda4a63c - // - // To work around this issue, the timeout is clamped to 1000 years. - // - // Cygwin implementation is based on NT API and a super large timeout - // makes the syscall block forever. - #[cfg(any(target_vendor = "apple", target_os = "cygwin"))] + // Cygwin's implementation is based on the NT API, which measures time + // in units of 100 ns. Unfortunately, Cygwin does not properly guard + // against overflow when converting the time, hence we clamp the interval + // to 1000 years, which will only become a problem in around 27000 years, + // when the next rollover is less than 1000 years away... + #[cfg(target_os = "cygwin")] let dur = Duration::min(dur, Duration::from_secs(1000 * 365 * 86400)); let timeout = Timespec::now(Self::CLOCK).checked_add_duration(&dur); @@ -84,6 +81,57 @@ impl Condvar { } } +// Apple platforms (since macOS version 10.4 and iOS version 2.0) have +// `pthread_cond_timedwait_relative_np`, a non-standard extension that +// measures timeouts based on the monotonic clock and is thus resilient +// against wall-clock changes. +#[cfg(target_vendor = "apple")] +impl Condvar { + /// # Safety + /// * `init` must have been called on this instance. + /// * `mutex` must be locked by the current thread. + /// * This condition variable may only be used with the same mutex. + pub unsafe fn wait_timeout(&self, mutex: Pin<&Mutex>, dur: Duration) -> bool { + let mutex = mutex.raw(); + + // The macOS implementation of `pthread_cond_timedwait` internally + // converts the timeout passed to `pthread_cond_timedwait_relative_np` + // to nanoseconds. Unfortunately, the "psynch" variant of condvars does + // not guard against overflow during the conversion[^1], which means + // that `pthread_cond_timedwait_relative_np` will return `ETIMEDOUT` + // much earlier than expected if the relative timeout is longer than + // `u64::MAX` nanoseconds. + // + // This can be observed even on newer platforms (by setting the environment + // variable PTHREAD_MUTEX_USE_ULOCK to a value other than "1") by calling e.g. + // ``` + // condvar.wait_timeout(..., Duration::from_secs(u64::MAX.div_ceil(1_000_000_000)); + // ``` + // (see #37440, especially + // https://github.com/rust-lang/rust/issues/37440#issuecomment-3285958326). + // + // To work around this issue, always clamp the timeout to u64::MAX nanoseconds, + // even if the "ulock" variant is used (which does guard against overflow). + // + // [^1]: https://github.com/apple-oss-distributions/libpthread/blob/1ebf56b3a702df53213c2996e5e128a535d2577e/kern/kern_synch.c#L1269 + const MAX_DURATION: Duration = Duration::from_nanos(u64::MAX); + + let (dur, clamped) = if dur <= MAX_DURATION { (dur, false) } else { (MAX_DURATION, true) }; + + let timeout = libc::timespec { + // This cannot overflow because of the clamping above. + tv_sec: dur.as_secs() as i64, + tv_nsec: dur.subsec_nanos() as i64, + }; + + let r = unsafe { libc::pthread_cond_timedwait_relative_np(self.raw(), mutex, &timeout) }; + assert!(r == libc::ETIMEDOUT || r == 0); + // Report clamping as a spurious wakeup. Who knows, maybe some + // interstellar space probe will rely on this ;-). + r == 0 || clamped + } +} + #[cfg(not(any( target_os = "android", target_vendor = "apple", @@ -125,10 +173,23 @@ impl Condvar { } } +#[cfg(target_vendor = "apple")] +impl Condvar { + // `pthread_cond_timedwait_relative_np` measures the timeout + // based on the monotonic clock. + pub const PRECISE_TIMEOUT: bool = true; + + /// # Safety + /// May only be called once per instance of `Self`. + pub unsafe fn init(self: Pin<&mut Self>) { + // `PTHREAD_COND_INITIALIZER` is fully supported and we don't need to + // change clocks, so there's nothing to do here. + } +} + // `pthread_condattr_setclock` is unfortunately not supported on these platforms. #[cfg(any( target_os = "android", - target_vendor = "apple", target_os = "espidf", target_os = "horizon", target_os = "l4re", diff --git a/std/tests/sync/condvar.rs b/std/tests/sync/condvar.rs index 1b1c33efad58e..2a525f9b5e948 100644 --- a/std/tests/sync/condvar.rs +++ b/std/tests/sync/condvar.rs @@ -267,3 +267,35 @@ nonpoison_and_poison_unwrap_test!( } } ); + +// Some platforms internally cast the timeout duration into nanoseconds. +// If they fail to consider overflow during the conversion (I'm looking +// at you, macOS), `wait_timeout` will return immediately and indicate a +// timeout for durations that are slightly longer than u64::MAX nanoseconds. +// `std` should guard against this by clamping the timeout. +// See #37440 for context. +nonpoison_and_poison_unwrap_test!( + name: timeout_nanoseconds, + test_body: { + use locks::Mutex; + use locks::Condvar; + + let sent = Mutex::new(false); + let cond = Condvar::new(); + + thread::scope(|s| { + s.spawn(|| { + thread::sleep(Duration::from_secs(2)); + maybe_unwrap(sent.set(true)); + cond.notify_all(); + }); + + let guard = maybe_unwrap(sent.lock()); + // If there is internal overflow, this call will return almost + // immediately, before the other thread has reached the `notify_all` + let (guard, res) = maybe_unwrap(cond.wait_timeout(guard, Duration::from_secs(u64::MAX.div_ceil(1_000_000_000)))); + assert!(!res.timed_out()); + assert!(*guard); + }) + } +); From 7b60d37dcc7d0f357dc5663373bbe92e95af941d Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 16 Sep 2025 12:13:01 +0200 Subject: [PATCH 091/358] std: reorganize the UNIX-internal `weak` module --- std/src/sys/pal/unix/mod.rs | 5 +- std/src/sys/pal/unix/stack_overflow.rs | 45 ++--- std/src/sys/pal/unix/weak.rs | 225 ---------------------- std/src/sys/pal/unix/weak/dlsym.rs | 104 ++++++++++ std/src/sys/pal/unix/weak/mod.rs | 52 +++++ std/src/sys/pal/unix/weak/syscall.rs | 19 ++ std/src/sys/pal/unix/weak/tests.rs | 20 +- std/src/sys/pal/unix/weak/weak_linkage.rs | 32 +++ 8 files changed, 232 insertions(+), 270 deletions(-) delete mode 100644 std/src/sys/pal/unix/weak.rs create mode 100644 std/src/sys/pal/unix/weak/dlsym.rs create mode 100644 std/src/sys/pal/unix/weak/mod.rs create mode 100644 std/src/sys/pal/unix/weak/syscall.rs create mode 100644 std/src/sys/pal/unix/weak/weak_linkage.rs diff --git a/std/src/sys/pal/unix/mod.rs b/std/src/sys/pal/unix/mod.rs index dd1059fe04a2d..9d303b8d65b39 100644 --- a/std/src/sys/pal/unix/mod.rs +++ b/std/src/sys/pal/unix/mod.rs @@ -2,10 +2,6 @@ use crate::io::ErrorKind; -#[cfg(not(target_os = "espidf"))] -#[macro_use] -pub mod weak; - #[cfg(target_os = "fuchsia")] pub mod fuchsia; pub mod futex; @@ -19,6 +15,7 @@ pub mod stack_overflow; pub mod sync; pub mod thread_parking; pub mod time; +pub mod weak; #[cfg(target_os = "espidf")] pub fn init(_argc: isize, _argv: *const *const u8, _sigpipe: u8) {} diff --git a/std/src/sys/pal/unix/stack_overflow.rs b/std/src/sys/pal/unix/stack_overflow.rs index 0d2100d66bc09..5e6f270be6ad3 100644 --- a/std/src/sys/pal/unix/stack_overflow.rs +++ b/std/src/sys/pal/unix/stack_overflow.rs @@ -69,7 +69,6 @@ mod imp { use super::Handler; use super::thread_info::{delete_current_info, set_current_info, with_current_info}; use crate::ops::Range; - use crate::sync::OnceLock; use crate::sync::atomic::{Atomic, AtomicBool, AtomicPtr, AtomicUsize, Ordering}; use crate::sys::pal::unix::os; use crate::{io, mem, panic, ptr}; @@ -396,6 +395,10 @@ mod imp { } else if cfg!(all(target_os = "linux", target_env = "musl")) { install_main_guard_linux_musl(page_size) } else if cfg!(target_os = "freebsd") { + #[cfg(not(target_os = "freebsd"))] + return None; + // The FreeBSD code cannot be checked on non-BSDs. + #[cfg(target_os = "freebsd")] install_main_guard_freebsd(page_size) } else if cfg!(any(target_os = "netbsd", target_os = "openbsd")) { install_main_guard_bsds(page_size) @@ -432,6 +435,7 @@ mod imp { } #[forbid(unsafe_op_in_unsafe_fn)] + #[cfg(target_os = "freebsd")] unsafe fn install_main_guard_freebsd(page_size: usize) -> Option> { // FreeBSD's stack autogrows, and optionally includes a guard page // at the bottom. If we try to remap the bottom of the stack @@ -443,38 +447,23 @@ mod imp { // by the security.bsd.stack_guard_page sysctl. // By default it is 1, checking once is enough since it is // a boot time config value. - static PAGES: OnceLock = OnceLock::new(); + static PAGES: crate::sync::OnceLock = crate::sync::OnceLock::new(); let pages = PAGES.get_or_init(|| { - use crate::sys::weak::dlsym; - dlsym!( - fn sysctlbyname( - name: *const libc::c_char, - oldp: *mut libc::c_void, - oldlenp: *mut libc::size_t, - newp: *const libc::c_void, - newlen: libc::size_t, - ) -> libc::c_int; - ); let mut guard: usize = 0; let mut size = size_of_val(&guard); let oid = c"security.bsd.stack_guard_page"; - match sysctlbyname.get() { - Some(fcn) - if unsafe { - fcn( - oid.as_ptr(), - (&raw mut guard).cast(), - &raw mut size, - ptr::null_mut(), - 0, - ) == 0 - } => - { - guard - } - _ => 1, - } + + let r = unsafe { + libc::sysctlbyname( + oid.as_ptr(), + (&raw mut guard).cast(), + &raw mut size, + ptr::null_mut(), + 0, + ) + }; + if r == 0 { guard } else { 1 } }); Some(guardaddr..guardaddr + pages * page_size) } diff --git a/std/src/sys/pal/unix/weak.rs b/std/src/sys/pal/unix/weak.rs deleted file mode 100644 index a3b980a3f3d85..0000000000000 --- a/std/src/sys/pal/unix/weak.rs +++ /dev/null @@ -1,225 +0,0 @@ -//! Support for "weak linkage" to symbols on Unix -//! -//! Some I/O operations we do in std require newer versions of OSes but we need -//! to maintain binary compatibility with older releases for now. In order to -//! use the new functionality when available we use this module for detection. -//! -//! One option to use here is weak linkage, but that is unfortunately only -//! really workable with ELF. Otherwise, use dlsym to get the symbol value at -//! runtime. This is also done for compatibility with older versions of glibc, -//! and to avoid creating dependencies on GLIBC_PRIVATE symbols. It assumes that -//! we've been dynamically linked to the library the symbol comes from, but that -//! is currently always the case for things like libpthread/libc. -//! -//! A long time ago this used weak linkage for the __pthread_get_minstack -//! symbol, but that caused Debian to detect an unnecessarily strict versioned -//! dependency on libc6 (#23628) because it is GLIBC_PRIVATE. We now use `dlsym` -//! for a runtime lookup of that symbol to avoid the ELF versioned dependency. - -// There are a variety of `#[cfg]`s controlling which targets are involved in -// each instance of `weak!` and `syscall!`. Rather than trying to unify all of -// that, we'll just allow that some unix targets don't use this module at all. -#![allow(dead_code, unused_macros)] -#![forbid(unsafe_op_in_unsafe_fn)] - -use crate::ffi::{CStr, c_char, c_void}; -use crate::marker::{FnPtr, PhantomData}; -use crate::sync::atomic::{Atomic, AtomicPtr, Ordering}; -use crate::{mem, ptr}; - -// We currently only test `dlsym!`, but that doesn't work on all platforms, so -// we gate the tests to only the platforms where it is actually used. -// -// FIXME(joboet): add more tests, reorganise the whole module and get rid of -// `#[allow(dead_code, unused_macros)]`. -#[cfg(any( - target_vendor = "apple", - all(target_os = "linux", target_env = "gnu"), - target_os = "freebsd", -))] -#[cfg(test)] -mod tests; - -// We can use true weak linkage on ELF targets. -#[cfg(all(unix, not(target_vendor = "apple")))] -pub(crate) macro weak { - (fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => ( - let ref $name: ExternWeak $ret> = { - unsafe extern "C" { - #[linkage = "extern_weak"] - static $name: Option $ret>; - } - #[allow(unused_unsafe)] - ExternWeak::new(unsafe { $name }) - }; - ) -} - -// On non-ELF targets, use the dlsym approximation of weak linkage. -#[cfg(target_vendor = "apple")] -pub(crate) use self::dlsym as weak; - -pub(crate) struct ExternWeak { - weak_ptr: Option, -} - -impl ExternWeak { - #[inline] - pub(crate) fn new(weak_ptr: Option) -> Self { - ExternWeak { weak_ptr } - } - - #[inline] - pub(crate) fn get(&self) -> Option { - self.weak_ptr - } -} - -pub(crate) macro dlsym { - (fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => ( - dlsym!( - #[link_name = stringify!($name)] - fn $name($($param : $t),*) -> $ret; - ); - ), - ( - #[link_name = $sym:expr] - fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty; - ) => ( - static DLSYM: DlsymWeak $ret> = { - let Ok(name) = CStr::from_bytes_with_nul(concat!($sym, '\0').as_bytes()) else { - panic!("symbol name may not contain NUL") - }; - - // SAFETY: Whoever calls the function pointer returned by `get()` - // is responsible for ensuring that the signature is correct. Just - // like with extern blocks, this is syntactically enforced by making - // the function pointer be unsafe. - unsafe { DlsymWeak::new(name) } - }; - - let $name = &DLSYM; - ) -} - -pub(crate) struct DlsymWeak { - /// A pointer to the nul-terminated name of the symbol. - // Use a pointer instead of `&'static CStr` to save space. - name: *const c_char, - func: Atomic<*mut libc::c_void>, - _marker: PhantomData, -} - -impl DlsymWeak { - /// # Safety - /// - /// If the signature of `F` does not match the signature of the symbol (if - /// it exists), calling the function pointer returned by `get()` is - /// undefined behaviour. - pub(crate) const unsafe fn new(name: &'static CStr) -> Self { - DlsymWeak { - name: name.as_ptr(), - func: AtomicPtr::new(ptr::without_provenance_mut(1)), - _marker: PhantomData, - } - } - - #[inline] - pub(crate) fn get(&self) -> Option { - // The caller is presumably going to read through this value - // (by calling the function we've dlsymed). This means we'd - // need to have loaded it with at least C11's consume - // ordering in order to be guaranteed that the data we read - // from the pointer isn't from before the pointer was - // stored. Rust has no equivalent to memory_order_consume, - // so we use an acquire load (sorry, ARM). - // - // Now, in practice this likely isn't needed even on CPUs - // where relaxed and consume mean different things. The - // symbols we're loading are probably present (or not) at - // init, and even if they aren't the runtime dynamic loader - // is extremely likely have sufficient barriers internally - // (possibly implicitly, for example the ones provided by - // invoking `mprotect`). - // - // That said, none of that's *guaranteed*, so we use acquire. - match self.func.load(Ordering::Acquire) { - func if func.addr() == 1 => self.initialize(), - func if func.is_null() => None, - // SAFETY: - // `func` is not null and `F` implements `FnPtr`, thus this - // transmutation is well-defined. It is the responsibility of the - // creator of this `DlsymWeak` to ensure that calling the resulting - // function pointer does not result in undefined behaviour (though - // the `dlsym!` macro delegates this responsibility to the caller - // of the function by using `unsafe` function pointers). - // FIXME: use `transmute` once it stops complaining about generics. - func => Some(unsafe { mem::transmute_copy::<*mut c_void, F>(&func) }), - } - } - - // Cold because it should only happen during first-time initialization. - #[cold] - fn initialize(&self) -> Option { - // SAFETY: `self.name` was created from a `&'static CStr` and is - // therefore a valid C string pointer. - let val = unsafe { libc::dlsym(libc::RTLD_DEFAULT, self.name) }; - // This synchronizes with the acquire load in `get`. - self.func.store(val, Ordering::Release); - - if val.is_null() { - None - } else { - // SAFETY: see the comment in `get`. - // FIXME: use `transmute` once it stops complaining about generics. - Some(unsafe { mem::transmute_copy::<*mut libc::c_void, F>(&val) }) - } - } -} - -unsafe impl Send for DlsymWeak {} -unsafe impl Sync for DlsymWeak {} - -#[cfg(not(any(target_os = "linux", target_os = "android")))] -pub(crate) macro syscall { - (fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => ( - unsafe fn $name($($param: $t),*) -> $ret { - weak!(fn $name($($param: $t),*) -> $ret;); - - if let Some(fun) = $name.get() { - unsafe { fun($($param),*) } - } else { - super::os::set_errno(libc::ENOSYS); - -1 - } - } - ) -} - -#[cfg(any(target_os = "linux", target_os = "android"))] -pub(crate) macro syscall { - ( - fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty; - ) => ( - unsafe fn $name($($param: $t),*) -> $ret { - weak!(fn $name($($param: $t),*) -> $ret;); - - // Use a weak symbol from libc when possible, allowing `LD_PRELOAD` - // interposition, but if it's not found just use a raw syscall. - if let Some(fun) = $name.get() { - unsafe { fun($($param),*) } - } else { - unsafe { libc::syscall(libc::${concat(SYS_, $name)}, $($param),*) as $ret } - } - } - ) -} - -#[cfg(any(target_os = "linux", target_os = "android"))] -pub(crate) macro raw_syscall { - (fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => ( - unsafe fn $name($($param: $t),*) -> $ret { - unsafe { libc::syscall(libc::${concat(SYS_, $name)}, $($param),*) as $ret } - } - ) -} diff --git a/std/src/sys/pal/unix/weak/dlsym.rs b/std/src/sys/pal/unix/weak/dlsym.rs new file mode 100644 index 0000000000000..4967b93cc52b5 --- /dev/null +++ b/std/src/sys/pal/unix/weak/dlsym.rs @@ -0,0 +1,104 @@ +use crate::ffi::{CStr, c_char, c_void}; +use crate::marker::{FnPtr, PhantomData}; +use crate::sync::atomic::{Atomic, AtomicPtr, Ordering}; +use crate::{mem, ptr}; + +#[cfg(test)] +#[path = "./tests.rs"] +mod tests; + +pub(crate) macro weak { + (fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => ( + static DLSYM: DlsymWeak $ret> = { + let Ok(name) = CStr::from_bytes_with_nul(concat!(stringify!($name), '\0').as_bytes()) else { + panic!("symbol name may not contain NUL") + }; + + // SAFETY: Whoever calls the function pointer returned by `get()` + // is responsible for ensuring that the signature is correct. Just + // like with extern blocks, this is syntactically enforced by making + // the function pointer be unsafe. + unsafe { DlsymWeak::new(name) } + }; + + let $name = &DLSYM; + ) +} + +pub(crate) struct DlsymWeak { + /// A pointer to the nul-terminated name of the symbol. + // Use a pointer instead of `&'static CStr` to save space. + name: *const c_char, + func: Atomic<*mut libc::c_void>, + _marker: PhantomData, +} + +impl DlsymWeak { + /// # Safety + /// + /// If the signature of `F` does not match the signature of the symbol (if + /// it exists), calling the function pointer returned by `get()` is + /// undefined behaviour. + pub const unsafe fn new(name: &'static CStr) -> Self { + DlsymWeak { + name: name.as_ptr(), + func: AtomicPtr::new(ptr::without_provenance_mut(1)), + _marker: PhantomData, + } + } + + #[inline] + pub fn get(&self) -> Option { + // The caller is presumably going to read through this value + // (by calling the function we've dlsymed). This means we'd + // need to have loaded it with at least C11's consume + // ordering in order to be guaranteed that the data we read + // from the pointer isn't from before the pointer was + // stored. Rust has no equivalent to memory_order_consume, + // so we use an acquire load (sorry, ARM). + // + // Now, in practice this likely isn't needed even on CPUs + // where relaxed and consume mean different things. The + // symbols we're loading are probably present (or not) at + // init, and even if they aren't the runtime dynamic loader + // is extremely likely have sufficient barriers internally + // (possibly implicitly, for example the ones provided by + // invoking `mprotect`). + // + // That said, none of that's *guaranteed*, so we use acquire. + match self.func.load(Ordering::Acquire) { + func if func.addr() == 1 => self.initialize(), + func if func.is_null() => None, + // SAFETY: + // `func` is not null and `F` implements `FnPtr`, thus this + // transmutation is well-defined. It is the responsibility of the + // creator of this `DlsymWeak` to ensure that calling the resulting + // function pointer does not result in undefined behaviour (though + // the `weak!` macro delegates this responsibility to the caller + // of the function by using `unsafe` function pointers). + // FIXME: use `transmute` once it stops complaining about generics. + func => Some(unsafe { mem::transmute_copy::<*mut c_void, F>(&func) }), + } + } + + // Cold because it should only happen during first-time initialization. + #[cold] + fn initialize(&self) -> Option { + // SAFETY: `self.name` was created from a `&'static CStr` and is + // therefore a valid C string pointer. + let val = unsafe { libc::dlsym(libc::RTLD_DEFAULT, self.name) }; + // This synchronizes with the acquire load in `get`. + self.func.store(val, Ordering::Release); + + if val.is_null() { + None + } else { + // SAFETY: see the comment in `get`. + // FIXME: use `transmute` once it stops complaining about generics. + Some(unsafe { mem::transmute_copy::<*mut libc::c_void, F>(&val) }) + } + } +} + +unsafe impl Send for DlsymWeak {} +unsafe impl Sync for DlsymWeak {} diff --git a/std/src/sys/pal/unix/weak/mod.rs b/std/src/sys/pal/unix/weak/mod.rs new file mode 100644 index 0000000000000..be53f148fc664 --- /dev/null +++ b/std/src/sys/pal/unix/weak/mod.rs @@ -0,0 +1,52 @@ +//! Support for "weak linkage" to symbols on Unix +//! +//! Some I/O operations we do in std require newer versions of OSes but we need +//! to maintain binary compatibility with older releases for now. In order to +//! use the new functionality when available we use this module for detection. +//! +//! One option to use here is weak linkage, but that is unfortunately only +//! really workable with ELF. Otherwise, use dlsym to get the symbol value at +//! runtime. This is also done for compatibility with older versions of glibc, +//! and to avoid creating dependencies on GLIBC_PRIVATE symbols. It assumes that +//! we've been dynamically linked to the library the symbol comes from, but that +//! is currently always the case for things like libpthread/libc. +//! +//! A long time ago this used weak linkage for the __pthread_get_minstack +//! symbol, but that caused Debian to detect an unnecessarily strict versioned +//! dependency on libc6 (#23628) because it is GLIBC_PRIVATE. We now use `dlsym` +//! for a runtime lookup of that symbol to avoid the ELF versioned dependency. + +#![forbid(unsafe_op_in_unsafe_fn)] + +cfg_select! { + // On non-ELF targets, use the dlsym approximation of weak linkage. + target_vendor = "apple" => { + mod dlsym; + pub(crate) use dlsym::weak; + } + + // Some targets don't need and support weak linkage at all... + target_os = "espidf" => {} + + // ... but ELF targets support true weak linkage. + _ => { + // There are a variety of `#[cfg]`s controlling which targets are involved in + // each instance of `weak!`. Rather than trying to unify all of + // that, we'll just allow that some unix targets don't use this macro at all. + #[cfg_attr(not(target_os = "linux"), allow(unused_macros, dead_code))] + mod weak_linkage; + #[cfg_attr(not(target_os = "linux"), allow(unused_imports))] + pub(crate) use weak_linkage::weak; + } +} + +// GNU/Linux needs the `dlsym` variant to avoid linking to private glibc symbols. +#[cfg(all(target_os = "linux", target_env = "gnu"))] +mod dlsym; +#[cfg(all(target_os = "linux", target_env = "gnu"))] +pub(crate) use dlsym::weak as dlsym; + +#[cfg(any(target_os = "android", target_os = "linux"))] +mod syscall; +#[cfg(any(target_os = "android", target_os = "linux"))] +pub(crate) use syscall::syscall; diff --git a/std/src/sys/pal/unix/weak/syscall.rs b/std/src/sys/pal/unix/weak/syscall.rs new file mode 100644 index 0000000000000..f1a60fb01d584 --- /dev/null +++ b/std/src/sys/pal/unix/weak/syscall.rs @@ -0,0 +1,19 @@ +use super::weak; + +pub(crate) macro syscall { + ( + fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty; + ) => ( + unsafe fn $name($($param: $t),*) -> $ret { + weak!(fn $name($($param: $t),*) -> $ret;); + + // Use a weak symbol from libc when possible, allowing `LD_PRELOAD` + // interposition, but if it's not found just use a raw syscall. + if let Some(fun) = $name.get() { + unsafe { fun($($param),*) } + } else { + unsafe { libc::syscall(libc::${concat(SYS_, $name)}, $($param),*) as $ret } + } + } + ) +} diff --git a/std/src/sys/pal/unix/weak/tests.rs b/std/src/sys/pal/unix/weak/tests.rs index d807ba64e3577..90d52095694a5 100644 --- a/std/src/sys/pal/unix/weak/tests.rs +++ b/std/src/sys/pal/unix/weak/tests.rs @@ -1,30 +1,24 @@ -use super::*; +// This file is included by both implementations of `weak!`. +use super::weak; +use crate::ffi::{CStr, c_char}; #[test] -fn dlsym_existing() { +fn weak_existing() { const TEST_STRING: &'static CStr = c"Ferris!"; // Try to find a symbol that definitely exists. - dlsym! { + weak! { fn strlen(cs: *const c_char) -> usize; } - dlsym! { - #[link_name = "strlen"] - fn custom_name(cs: *const c_char) -> usize; - } - let strlen = strlen.get().unwrap(); assert_eq!(unsafe { strlen(TEST_STRING.as_ptr()) }, TEST_STRING.count_bytes()); - - let custom_name = custom_name.get().unwrap(); - assert_eq!(unsafe { custom_name(TEST_STRING.as_ptr()) }, TEST_STRING.count_bytes()); } #[test] -fn dlsym_missing() { +fn weak_missing() { // Try to find a symbol that definitely does not exist. - dlsym! { + weak! { fn test_symbol_that_does_not_exist() -> i32; } diff --git a/std/src/sys/pal/unix/weak/weak_linkage.rs b/std/src/sys/pal/unix/weak/weak_linkage.rs new file mode 100644 index 0000000000000..3963f1d89be7c --- /dev/null +++ b/std/src/sys/pal/unix/weak/weak_linkage.rs @@ -0,0 +1,32 @@ +#[cfg(test)] +#[path = "./tests.rs"] +mod tests; + +pub(crate) macro weak { + (fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => ( + let ref $name: ExternWeak $ret> = { + unsafe extern "C" { + #[linkage = "extern_weak"] + static $name: Option $ret>; + } + #[allow(unused_unsafe)] + ExternWeak::new(unsafe { $name }) + }; + ) +} + +pub(crate) struct ExternWeak { + weak_ptr: Option, +} + +impl ExternWeak { + #[inline] + pub fn new(weak_ptr: Option) -> Self { + ExternWeak { weak_ptr } + } + + #[inline] + pub fn get(&self) -> Option { + self.weak_ptr + } +} From 19e4096d947d6acf94d2574f54bb472f24fffa44 Mon Sep 17 00:00:00 2001 From: yukang Date: Wed, 15 Oct 2025 08:02:32 +0800 Subject: [PATCH 092/358] Fix compiling error for redox etc --- std/src/sys/fs/unix.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index 51849a31f61b5..d9a7fcb0e2d39 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -2113,10 +2113,10 @@ fn open_from(from: &Path) -> io::Result<(crate::fs::File, crate::fs::Metadata)> Ok((reader, metadata)) } -fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { +fn set_times_impl(p: &CStr, times: FileTimes, follow_symlinks: bool) -> io::Result<()> { cfg_select! { any(target_os = "redox", target_os = "espidf", target_os = "horizon", target_os = "nuttx") => { - let _ = (p, times, flags); + let _ = (p, times, follow_symlinks); Err(io::const_error!( io::ErrorKind::Unsupported, "setting file times not supported", @@ -2124,12 +2124,11 @@ fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { } target_vendor = "apple" => { // Apple platforms use setattrlist which supports setting times on symlinks - //let (attrlist, buf, num_times) = set_attrlist_with_times(×)?; let ta = TimesAttrlist::from_times(×)?; - let options = if flags == libc::AT_SYMLINK_NOFOLLOW { - libc::FSOPT_NOFOLLOW - } else { + let options = if follow_symlinks { 0 + } else { + libc::FSOPT_NOFOLLOW }; cvt(unsafe { libc::setattrlist( @@ -2143,6 +2142,7 @@ fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { } target_os = "android" => { let times = [file_time_to_timespec(times.accessed)?, file_time_to_timespec(times.modified)?]; + let flags = if follow_symlinks { 0 } else { libc::AT_SYMLINK_NOFOLLOW }; // utimensat requires Android API level 19 cvt(unsafe { weak!( @@ -2159,6 +2159,7 @@ fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { Ok(()) } _ => { + let flags = if follow_symlinks { 0 } else { libc::AT_SYMLINK_NOFOLLOW }; #[cfg(all(target_os = "linux", target_env = "gnu", target_pointer_width = "32", not(target_arch = "riscv32")))] { use crate::sys::{time::__timespec64, weak::weak}; @@ -2185,13 +2186,12 @@ fn set_times_impl(p: &CStr, times: FileTimes, flags: c_int) -> io::Result<()> { #[inline(always)] pub fn set_times(p: &CStr, times: FileTimes) -> io::Result<()> { - // flags = 0 means follow symlinks - set_times_impl(p, times, 0) + set_times_impl(p, times, true) } #[inline(always)] pub fn set_times_nofollow(p: &CStr, times: FileTimes) -> io::Result<()> { - set_times_impl(p, times, libc::AT_SYMLINK_NOFOLLOW) + set_times_impl(p, times, false) } #[cfg(target_os = "espidf")] From 08ccecac8dacfd902a5189b26da380ff668b3380 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Tue, 14 Oct 2025 21:12:11 -0400 Subject: [PATCH 093/358] Clarify that UB will occur, not can/may in GlobalAlloc docs --- core/src/alloc/global.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/alloc/global.rs b/core/src/alloc/global.rs index 5bf6f143b4f82..e2413b619f9fa 100644 --- a/core/src/alloc/global.rs +++ b/core/src/alloc/global.rs @@ -124,7 +124,7 @@ pub unsafe trait GlobalAlloc { /// /// # Safety /// - /// `layout` must have non-zero size. Attempting to allocate for a zero-sized `layout` may + /// `layout` must have non-zero size. Attempting to allocate for a zero-sized `layout` will /// result in undefined behavior. /// /// (Extension subtraits might provide more specific bounds on @@ -163,7 +163,7 @@ pub unsafe trait GlobalAlloc { /// * `layout` is the same layout that was used to allocate that block of /// memory. /// - /// Otherwise undefined behavior can result. + /// Otherwise the behavior is undefined. #[stable(feature = "global_alloc", since = "1.28.0")] unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout); @@ -173,7 +173,7 @@ pub unsafe trait GlobalAlloc { /// # Safety /// /// The caller has to ensure that `layout` has non-zero size. Like `alloc` - /// zero sized `layout` can result in undefined behavior. + /// zero sized `layout` will result in undefined behavior. /// However the allocated block of memory is guaranteed to be initialized. /// /// # Errors @@ -234,7 +234,7 @@ pub unsafe trait GlobalAlloc { /// does not overflow `isize` (i.e., the rounded value must be less than or /// equal to `isize::MAX`). /// - /// If these are not followed, undefined behavior can result. + /// If these are not followed, the behavior is undefined. /// /// (Extension subtraits might provide more specific bounds on /// behavior, e.g., guarantee a sentinel address or a null pointer From 43f708ca7ad705145a16e7fca67c5c6157b7f1b1 Mon Sep 17 00:00:00 2001 From: ltdk Date: Tue, 14 Oct 2025 20:39:39 -0400 Subject: [PATCH 094/358] const mem::drop --- core/src/mem/mod.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/core/src/mem/mod.rs b/core/src/mem/mod.rs index c484551187cc3..a537269284773 100644 --- a/core/src/mem/mod.rs +++ b/core/src/mem/mod.rs @@ -6,7 +6,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use crate::alloc::Layout; -use crate::marker::DiscriminantKind; +use crate::marker::{Destruct, DiscriminantKind}; use crate::panic::const_assert; use crate::{clone, cmp, fmt, hash, intrinsics, ptr}; @@ -958,8 +958,13 @@ pub const fn replace(dest: &mut T, src: T) -> T { /// [`RefCell`]: crate::cell::RefCell #[inline] #[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_unstable(feature = "const_destruct", issue = "133214")] #[rustc_diagnostic_item = "mem_drop"] -pub fn drop(_x: T) {} +pub const fn drop(_x: T) +where + T: [const] Destruct, +{ +} /// Bitwise-copies a value. /// From 8115ebaef1c8e169e8fbf965e6d7f1c363fe2b86 Mon Sep 17 00:00:00 2001 From: David Wood Date: Wed, 15 Oct 2025 09:40:32 +0100 Subject: [PATCH 095/358] core: relax supertrait of `Destruct` --- core/src/marker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/marker.rs b/core/src/marker.rs index 4b767d8d62218..5fd0611a18434 100644 --- a/core/src/marker.rs +++ b/core/src/marker.rs @@ -1057,7 +1057,7 @@ marker_impls! { #[rustc_on_unimplemented(message = "can't drop `{Self}`", append_const_msg)] #[rustc_deny_explicit_impl] #[rustc_do_not_implement_via_object] -pub const trait Destruct {} +pub const trait Destruct: PointeeSized {} /// A marker for tuple types. /// From d536344228c3bb226fef399c62f1d123e6478e15 Mon Sep 17 00:00:00 2001 From: h3nryc0ding Date: Wed, 15 Oct 2025 18:48:25 +0200 Subject: [PATCH 096/358] remove duplicate inline macro --- alloc/src/sync.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs index 5927d03646928..c78f2c8a47e00 100644 --- a/alloc/src/sync.rs +++ b/alloc/src/sync.rs @@ -886,7 +886,6 @@ impl Arc { /// let five = Arc::try_new_in(5, System)?; /// # Ok::<(), std::alloc::AllocError>(()) /// ``` - #[inline] #[unstable(feature = "allocator_api", issue = "32838")] #[inline] pub fn try_new_in(data: T, alloc: A) -> Result, AllocError> { From 1bcea0ada7c1d740196bd0064fcd7b1f06ce5aa3 Mon Sep 17 00:00:00 2001 From: timvisee Date: Thu, 16 Oct 2025 10:52:51 +0200 Subject: [PATCH 097/358] is_ascii on an empty string or slice returns true --- core/src/slice/ascii.rs | 2 ++ core/src/str/mod.rs | 2 ++ std/src/ffi/os_str.rs | 2 ++ 3 files changed, 6 insertions(+) diff --git a/core/src/slice/ascii.rs b/core/src/slice/ascii.rs index e17a2e03d2dc4..42916558b5fc4 100644 --- a/core/src/slice/ascii.rs +++ b/core/src/slice/ascii.rs @@ -9,6 +9,8 @@ use crate::{ascii, iter, ops}; impl [u8] { /// Checks if all bytes in this slice are within the ASCII range. + /// + /// An empty slice returns `true`. #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] #[rustc_const_stable(feature = "const_slice_is_ascii", since = "1.74.0")] #[must_use] diff --git a/core/src/str/mod.rs b/core/src/str/mod.rs index 3a5efa7d83511..82019b9b3afe5 100644 --- a/core/src/str/mod.rs +++ b/core/src/str/mod.rs @@ -2704,6 +2704,8 @@ impl str { /// Checks if all characters in this string are within the ASCII range. /// + /// An empty string returns `true`. + /// /// # Examples /// /// ``` diff --git a/std/src/ffi/os_str.rs b/std/src/ffi/os_str.rs index 6c098034eea3b..09bd911aa769a 100644 --- a/std/src/ffi/os_str.rs +++ b/std/src/ffi/os_str.rs @@ -1215,6 +1215,8 @@ impl OsStr { /// Checks if all characters in this string are within the ASCII range. /// + /// An empty string returns `true`. + /// /// # Examples /// /// ``` From fcf542587855c3cf05034db60b72b2db74752768 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 11 Oct 2025 14:45:54 +0200 Subject: [PATCH 098/358] std-detect: improve detect macro docs document that the detect macros expand to `true` when the feature is statically enabled --- std_detect/src/detect/arch/aarch64.rs | 9 +++++++-- std_detect/src/detect/arch/arm.rs | 5 ++++- std_detect/src/detect/arch/loongarch.rs | 6 +++++- std_detect/src/detect/arch/mips.rs | 5 ++++- std_detect/src/detect/arch/mips64.rs | 5 ++++- std_detect/src/detect/arch/powerpc.rs | 5 ++++- std_detect/src/detect/arch/powerpc64.rs | 5 ++++- std_detect/src/detect/arch/riscv.rs | 6 ++++-- std_detect/src/detect/arch/s390x.rs | 5 ++++- std_detect/src/detect/arch/x86.rs | 11 +++++------ 10 files changed, 45 insertions(+), 17 deletions(-) diff --git a/std_detect/src/detect/arch/aarch64.rs b/std_detect/src/detect/arch/aarch64.rs index 13570a25c1cfe..5e85e96374eda 100644 --- a/std_detect/src/detect/arch/aarch64.rs +++ b/std_detect/src/detect/arch/aarch64.rs @@ -5,13 +5,18 @@ features! { @CFG: any(target_arch = "aarch64", target_arch = "arm64ec"); @MACRO_NAME: is_aarch64_feature_detected; @MACRO_ATTRS: - /// This macro tests, at runtime, whether an `aarch64` feature is enabled on aarch64 platforms. - /// Currently most features are only supported on linux-based platforms. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. /// /// This macro takes one argument which is a string literal of the feature being tested for. /// The feature names are mostly taken from their FEAT_* definitions in the [ARM Architecture /// Reference Manual][docs]. /// + /// Currently most features are only supported on linux-based platforms: on other platforms the + /// runtime check will always return `false`. + /// /// ## Supported arguments /// /// * `"aes"` - FEAT_AES & FEAT_PMULL diff --git a/std_detect/src/detect/arch/arm.rs b/std_detect/src/detect/arch/arm.rs index c3c8883ce3153..75b8ca9a1e880 100644 --- a/std_detect/src/detect/arch/arm.rs +++ b/std_detect/src/detect/arch/arm.rs @@ -5,7 +5,10 @@ features! { @CFG: target_arch = "arm"; @MACRO_NAME: is_arm_feature_detected; @MACRO_ATTRS: - /// Checks if `arm` feature is enabled. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] @NO_RUNTIME_DETECTION: "v7"; @NO_RUNTIME_DETECTION: "vfp2"; diff --git a/std_detect/src/detect/arch/loongarch.rs b/std_detect/src/detect/arch/loongarch.rs index d5a442fbbb8a4..6299627738111 100644 --- a/std_detect/src/detect/arch/loongarch.rs +++ b/std_detect/src/detect/arch/loongarch.rs @@ -5,7 +5,11 @@ features! { @CFG: any(target_arch = "loongarch32", target_arch = "loongarch64"); @MACRO_NAME: is_loongarch_feature_detected; @MACRO_ATTRS: - /// Checks if `loongarch` feature is enabled. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. + /// /// Supported arguments are: /// /// * `"32s"` diff --git a/std_detect/src/detect/arch/mips.rs b/std_detect/src/detect/arch/mips.rs index e185fdfcaac6c..9e1960eb96da5 100644 --- a/std_detect/src/detect/arch/mips.rs +++ b/std_detect/src/detect/arch/mips.rs @@ -5,7 +5,10 @@ features! { @CFG: target_arch = "mips"; @MACRO_NAME: is_mips_feature_detected; @MACRO_ATTRS: - /// Checks if `mips` feature is enabled. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa"; /// MIPS SIMD Architecture (MSA) diff --git a/std_detect/src/detect/arch/mips64.rs b/std_detect/src/detect/arch/mips64.rs index 69fe4869d30eb..2bb44ba6e2b39 100644 --- a/std_detect/src/detect/arch/mips64.rs +++ b/std_detect/src/detect/arch/mips64.rs @@ -5,7 +5,10 @@ features! { @CFG: target_arch = "mips64"; @MACRO_NAME: is_mips64_feature_detected; @MACRO_ATTRS: - /// Checks if `mips64` feature is enabled. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa"; /// MIPS SIMD Architecture (MSA) diff --git a/std_detect/src/detect/arch/powerpc.rs b/std_detect/src/detect/arch/powerpc.rs index c390993a48a69..be2db0b81c2fc 100644 --- a/std_detect/src/detect/arch/powerpc.rs +++ b/std_detect/src/detect/arch/powerpc.rs @@ -5,7 +5,10 @@ features! { @CFG: target_arch = "powerpc"; @MACRO_NAME: is_powerpc_feature_detected; @MACRO_ATTRS: - /// Checks if `powerpc` feature is enabled. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec"; /// Altivec diff --git a/std_detect/src/detect/arch/powerpc64.rs b/std_detect/src/detect/arch/powerpc64.rs index cf05baa6f799e..98e8d5f32b75c 100644 --- a/std_detect/src/detect/arch/powerpc64.rs +++ b/std_detect/src/detect/arch/powerpc64.rs @@ -5,7 +5,10 @@ features! { @CFG: target_arch = "powerpc64"; @MACRO_NAME: is_powerpc64_feature_detected; @MACRO_ATTRS: - /// Checks if `powerpc` feature is enabled. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec"; /// Altivec diff --git a/std_detect/src/detect/arch/riscv.rs b/std_detect/src/detect/arch/riscv.rs index 1e57d09edb143..846d7f10d68fc 100644 --- a/std_detect/src/detect/arch/riscv.rs +++ b/std_detect/src/detect/arch/riscv.rs @@ -5,8 +5,10 @@ features! { @CFG: any(target_arch = "riscv32", target_arch = "riscv64"); @MACRO_NAME: is_riscv_feature_detected; @MACRO_ATTRS: - /// A macro to test at *runtime* whether instruction sets are available on - /// RISC-V platforms. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. /// /// RISC-V standard defined the base sets and the extension sets. /// The base sets are RV32I, RV64I, RV32E or RV128I. Any RISC-V platform diff --git a/std_detect/src/detect/arch/s390x.rs b/std_detect/src/detect/arch/s390x.rs index 4c20d011680bc..d59fbc7de3bd6 100644 --- a/std_detect/src/detect/arch/s390x.rs +++ b/std_detect/src/detect/arch/s390x.rs @@ -5,7 +5,10 @@ features! { @CFG: target_arch = "s390x"; @MACRO_NAME: is_s390x_feature_detected; @MACRO_ATTRS: - /// Checks if `s390x` feature is enabled. + /// Check for the presence of a CPU feature at runtime. + /// + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] concurrent_functions: "concurrent-functions"; /// s390x concurrent-functions facility diff --git a/std_detect/src/detect/arch/x86.rs b/std_detect/src/detect/arch/x86.rs index bd749b88f566d..21d8b5609d961 100644 --- a/std_detect/src/detect/arch/x86.rs +++ b/std_detect/src/detect/arch/x86.rs @@ -20,13 +20,12 @@ features! { @CFG: any(target_arch = "x86", target_arch = "x86_64"); @MACRO_NAME: is_x86_feature_detected; @MACRO_ATTRS: - /// A macro to test at *runtime* whether a CPU feature is available on - /// x86/x86-64 platforms. + /// Check for the presence of a CPU feature at runtime. /// - /// This macro is provided in the standard library and will detect at runtime - /// whether the specified CPU feature is detected. This does **not** resolve at - /// compile time unless the specified feature is already enabled for the entire - /// crate. Runtime detection currently relies mostly on the `cpuid` instruction. + /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) + /// the macro expands to `true`. + /// + /// Runtime detection currently relies mostly on the `cpuid` instruction. /// /// This macro only takes one argument which is a string literal of the feature /// being tested for. The feature names supported are the lowercase versions of From afd64ba7721ddb9aea3545cf914508b4d208cdcc Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Thu, 16 Oct 2025 09:07:57 -0400 Subject: [PATCH 099/358] add link to Builder (code review improvement) --- std/src/thread/mod.rs | 4 ++-- std/src/thread/scoped.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/std/src/thread/mod.rs b/std/src/thread/mod.rs index 78c85c0af6449..16313da8e178c 100644 --- a/std/src/thread/mod.rs +++ b/std/src/thread/mod.rs @@ -620,8 +620,8 @@ impl Builder { /// (It is the responsibility of the program to either eventually join threads it /// creates or detach them; otherwise, a resource leak will result.) /// -/// This function creates a thread with the default parameters. To specify the -/// new thread's stack size or the name, use [`Builder::spawn`]. +/// This function creates a thread with the default parameters of [`Builder`]. +/// To specify the new thread's stack size or the name, use [`Builder::spawn`]. /// /// As you can see in the signature of `spawn` there are two constraints on /// both the closure given to `spawn` and its return value, let's explain them: diff --git a/std/src/thread/scoped.rs b/std/src/thread/scoped.rs index 2368ce4988d89..75a5303fc321a 100644 --- a/std/src/thread/scoped.rs +++ b/std/src/thread/scoped.rs @@ -181,8 +181,8 @@ impl<'scope, 'env> Scope<'scope, 'env> { /// end of the scope. In that case, if the spawned thread panics, [`scope`] will /// panic after all threads are joined. /// - /// This function creates a thread with the default parameters. To specify the - /// new thread's stack size or the name, use [`Builder::spawn_scoped`]. + /// This function creates a thread with the default parameters of [`Builder`]. + /// To specify the new thread's stack size or the name, use [`Builder::spawn_scoped`]. /// /// # Panics /// From caba71997161fef66330d8f1f654c6b57922afc1 Mon Sep 17 00:00:00 2001 From: nxsaken Date: Thu, 16 Oct 2025 22:03:50 +0400 Subject: [PATCH 100/358] Return `Option` from `exact_div` and inherit overflow checks --- core/src/num/int_macros.rs | 30 ++++++++++++++++++------------ core/src/num/uint_macros.rs | 24 +++++++++++------------- coretests/tests/num/int_macros.rs | 9 +++++---- coretests/tests/num/uint_macros.rs | 5 +++-- 4 files changed, 37 insertions(+), 31 deletions(-) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index c3460a6409069..4c2d2341174f1 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -1019,25 +1019,29 @@ macro_rules! int_impl { } } - /// Checked integer division without remainder. Computes `self / rhs`. + /// Integer division without remainder. Computes `self / rhs`, returning `None` if `self % rhs != 0`. /// /// # Panics /// - /// This function will panic if `rhs == 0`, the division results in overflow, - /// or `self % rhs != 0`. + /// This function will panic if `rhs == 0`. + /// + /// ## Overflow behavior + /// + /// On overflow, this function will panic if overflow checks are enabled (default in debug + /// mode) and wrap if overflow checks are disabled (default in release mode). /// /// # Examples /// /// ``` /// #![feature(exact_div)] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(2), 32);")] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(32), 2);")] - #[doc = concat!("assert_eq!((", stringify!($SelfT), "::MIN + 1).exact_div(-1), ", stringify!($Max), ");")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(2), Some(32));")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(32), Some(2));")] + #[doc = concat!("assert_eq!((", stringify!($SelfT), "::MIN + 1).exact_div(-1), Some(", stringify!($Max), "));")] + #[doc = concat!("assert_eq!(65", stringify!($SelfT), ".exact_div(2), None);")] /// ``` - /// /// ```should_panic /// #![feature(exact_div)] - #[doc = concat!("let _ = 65", stringify!($SelfT), ".exact_div(2);")] + #[doc = concat!("let _ = 64", stringify!($SelfT),".exact_div(0);")] /// ``` /// ```should_panic /// #![feature(exact_div)] @@ -1050,10 +1054,12 @@ macro_rules! int_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const fn exact_div(self, rhs: Self) -> Self { - match self.checked_exact_div(rhs) { - Some(x) => x, - None => panic!("Failed to divide without remainder"), + #[rustc_inherit_overflow_checks] + pub const fn exact_div(self, rhs: Self) -> Option { + if self % rhs != 0 { + None + } else { + Some(self / rhs) } } diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index b5b768cf677aa..1f05f883c93fa 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -1249,23 +1249,19 @@ macro_rules! uint_impl { } } - /// Checked integer division without remainder. Computes `self / rhs`. + /// Integer division without remainder. Computes `self / rhs`, returning `None` if `self % rhs != 0`. /// /// # Panics /// - /// This function will panic if `rhs == 0` or `self % rhs != 0`. + /// This function will panic if `rhs == 0`. /// /// # Examples /// /// ``` /// #![feature(exact_div)] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(2), 32);")] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(32), 2);")] - /// ``` - /// - /// ```should_panic - /// #![feature(exact_div)] - #[doc = concat!("let _ = 65", stringify!($SelfT), ".exact_div(2);")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(2), Some(32));")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(32), Some(2));")] + #[doc = concat!("assert_eq!(65", stringify!($SelfT), ".exact_div(2), None);")] /// ``` #[unstable( feature = "exact_div", @@ -1274,10 +1270,12 @@ macro_rules! uint_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const fn exact_div(self, rhs: Self) -> Self { - match self.checked_exact_div(rhs) { - Some(x) => x, - None => panic!("Failed to divide without remainder"), + #[rustc_inherit_overflow_checks] + pub const fn exact_div(self, rhs: Self) -> Option { + if self % rhs != 0 { + None + } else { + Some(self / rhs) } } diff --git a/coretests/tests/num/int_macros.rs b/coretests/tests/num/int_macros.rs index 1611a6466f5ab..e640b7853bd94 100644 --- a/coretests/tests/num/int_macros.rs +++ b/coretests/tests/num/int_macros.rs @@ -741,22 +741,23 @@ macro_rules! int_module { fn test_exact_div() { // 42 / 6 assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), Some(EXACT_DIV_SUCCESS_QUOTIENT1)); - assert_eq_const_safe!($T: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), EXACT_DIV_SUCCESS_QUOTIENT1); + assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), Some(EXACT_DIV_SUCCESS_QUOTIENT1)); // 18 / 3 assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), Some(EXACT_DIV_SUCCESS_QUOTIENT2)); - assert_eq_const_safe!($T: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), EXACT_DIV_SUCCESS_QUOTIENT2); + assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), Some(EXACT_DIV_SUCCESS_QUOTIENT2)); // -91 / 13 assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND3, EXACT_DIV_SUCCESS_DIVISOR3), Some(EXACT_DIV_SUCCESS_QUOTIENT3)); - assert_eq_const_safe!($T: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND3, EXACT_DIV_SUCCESS_DIVISOR3), EXACT_DIV_SUCCESS_QUOTIENT3); + assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND3, EXACT_DIV_SUCCESS_DIVISOR3), Some(EXACT_DIV_SUCCESS_QUOTIENT3)); // -57 / -3 assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND4, EXACT_DIV_SUCCESS_DIVISOR4), Some(EXACT_DIV_SUCCESS_QUOTIENT4)); - assert_eq_const_safe!($T: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND4, EXACT_DIV_SUCCESS_DIVISOR4), EXACT_DIV_SUCCESS_QUOTIENT4); + assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND4, EXACT_DIV_SUCCESS_DIVISOR4), Some(EXACT_DIV_SUCCESS_QUOTIENT4)); // failures assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(1, 2), None); + assert_eq_const_safe!(Option<$T>: <$T>::exact_div(1, 2), None); assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(<$T>::MIN, -1), None); assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(0, 0), None); } diff --git a/coretests/tests/num/uint_macros.rs b/coretests/tests/num/uint_macros.rs index 63be8a45b5cfd..c1cfc448f14f5 100644 --- a/coretests/tests/num/uint_macros.rs +++ b/coretests/tests/num/uint_macros.rs @@ -606,14 +606,15 @@ macro_rules! uint_module { fn test_exact_div() { // 42 / 6 assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), Some(EXACT_DIV_SUCCESS_QUOTIENT1)); - assert_eq_const_safe!($T: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), EXACT_DIV_SUCCESS_QUOTIENT1); + assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), Some(EXACT_DIV_SUCCESS_QUOTIENT1)); // 18 / 3 assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), Some(EXACT_DIV_SUCCESS_QUOTIENT2)); - assert_eq_const_safe!($T: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), EXACT_DIV_SUCCESS_QUOTIENT2); + assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), Some(EXACT_DIV_SUCCESS_QUOTIENT2)); // failures assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(1, 2), None); + assert_eq_const_safe!(Option<$T>: <$T>::exact_div(1, 2), None); assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(0, 0), None); } } From db5c6d4d3a74581e4030256396cb503297fb93bd Mon Sep 17 00:00:00 2001 From: nxsaken Date: Thu, 16 Oct 2025 22:41:45 +0400 Subject: [PATCH 101/358] Remove whitespace --- core/src/num/int_macros.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index 4c2d2341174f1..672e56a0e56d6 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -1024,7 +1024,7 @@ macro_rules! int_impl { /// # Panics /// /// This function will panic if `rhs == 0`. - /// + /// /// ## Overflow behavior /// /// On overflow, this function will panic if overflow checks are enabled (default in debug From a0ec1277738ede7fd086cfd17924bc1cb2e10963 Mon Sep 17 00:00:00 2001 From: ltdk Date: Thu, 16 Oct 2025 15:04:43 -0400 Subject: [PATCH 102/358] const Cell methods --- core/src/cell.rs | 21 +++++++++++++++++---- coretests/tests/lib.rs | 1 + coretests/tests/ptr.rs | 29 ++++++++++++++--------------- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/core/src/cell.rs b/core/src/cell.rs index aeac35e45a5d0..988c50795e299 100644 --- a/core/src/cell.rs +++ b/core/src/cell.rs @@ -252,7 +252,7 @@ use crate::cmp::Ordering; use crate::fmt::{self, Debug, Display}; -use crate::marker::{PhantomData, Unsize}; +use crate::marker::{Destruct, PhantomData, Unsize}; use crate::mem::{self, ManuallyDrop}; use crate::ops::{self, CoerceUnsized, Deref, DerefMut, DerefPure, DispatchFromDyn}; use crate::panic::const_panic; @@ -429,7 +429,11 @@ impl Cell { /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] - pub fn set(&self, val: T) { + #[rustc_const_unstable(feature = "const_cell_traits", issue = "147787")] + pub const fn set(&self, val: T) + where + T: [const] Destruct, + { self.replace(val); } @@ -561,7 +565,12 @@ impl Cell { /// ``` #[inline] #[stable(feature = "cell_update", since = "1.88.0")] - pub fn update(&self, f: impl FnOnce(T) -> T) { + #[rustc_const_unstable(feature = "const_cell_traits", issue = "147787")] + pub const fn update(&self, f: impl [const] FnOnce(T) -> T) + where + // FIXME(const-hack): `Copy` should imply `const Destruct` + T: [const] Destruct, + { let old = self.get(); self.set(f(old)); } @@ -654,7 +663,11 @@ impl Cell { /// assert_eq!(c.into_inner(), 0); /// ``` #[stable(feature = "move_cell", since = "1.17.0")] - pub fn take(&self) -> T { + #[rustc_const_unstable(feature = "const_cell_traits", issue = "147787")] + pub const fn take(&self) -> T + where + T: [const] Default, + { self.replace(Default::default()) } } diff --git a/coretests/tests/lib.rs b/coretests/tests/lib.rs index c2dc3a99ab109..5c2522acb1362 100644 --- a/coretests/tests/lib.rs +++ b/coretests/tests/lib.rs @@ -16,6 +16,7 @@ #![feature(char_internals)] #![feature(char_max_len)] #![feature(clone_to_uninit)] +#![feature(const_cell_traits)] #![feature(const_cmp)] #![feature(const_convert)] #![feature(const_destruct)] diff --git a/coretests/tests/ptr.rs b/coretests/tests/ptr.rs index e89f21271027a..7afbb1260f239 100644 --- a/coretests/tests/ptr.rs +++ b/coretests/tests/ptr.rs @@ -1049,38 +1049,37 @@ fn test_ptr_default() { #[test] fn test_const_drop_in_place() { const COUNTER: usize = { - let mut counter = 0; - let counter_ptr = &raw mut counter; + use core::cell::Cell; + + let counter = Cell::new(0); // only exists to make `Drop` indirect impl #[allow(dead_code)] - struct Test(Dropped); + struct Test<'a>(Dropped<'a>); - struct Dropped(*mut usize); - impl const Drop for Dropped { + struct Dropped<'a>(&'a Cell); + impl const Drop for Dropped<'_> { fn drop(&mut self) { - unsafe { - *self.0 += 1; - } + self.0.set(self.0.get() + 1); } } - let mut one = ManuallyDrop::new(Test(Dropped(counter_ptr))); - let mut two = ManuallyDrop::new(Test(Dropped(counter_ptr))); - let mut three = ManuallyDrop::new(Test(Dropped(counter_ptr))); - assert!(counter == 0); + let mut one = ManuallyDrop::new(Test(Dropped(&counter))); + let mut two = ManuallyDrop::new(Test(Dropped(&counter))); + let mut three = ManuallyDrop::new(Test(Dropped(&counter))); + assert!(counter.get() == 0); unsafe { ManuallyDrop::drop(&mut one); } - assert!(counter == 1); + assert!(counter.get() == 1); unsafe { ManuallyDrop::drop(&mut two); } - assert!(counter == 2); + assert!(counter.get() == 2); unsafe { ManuallyDrop::drop(&mut three); } - counter + counter.get() }; assert_eq!(COUNTER, 3); } From d985fa13fc9dd37142f7b28e561c6d565ac4ac9a Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 17 Oct 2025 10:57:39 -0400 Subject: [PATCH 103/358] rename `once::ExclusiveState` to `OnceExclusiveState` It is a bit confusing when reading code that uses this type since it is not immediately obvious that it is specific to `Once`. Signed-off-by: Connor Tsui --- std/src/sync/lazy_lock.rs | 34 +++++++++++++++++------------ std/src/sync/poison/once.rs | 8 ++++--- std/src/sys/sync/once/futex.rs | 18 +++++++-------- std/src/sys/sync/once/no_threads.rs | 18 +++++++-------- std/src/sys/sync/once/queue.rs | 18 +++++++-------- 5 files changed, 52 insertions(+), 44 deletions(-) diff --git a/std/src/sync/lazy_lock.rs b/std/src/sync/lazy_lock.rs index 3231125f7a13a..16081d43cd49f 100644 --- a/std/src/sync/lazy_lock.rs +++ b/std/src/sync/lazy_lock.rs @@ -1,4 +1,4 @@ -use super::poison::once::ExclusiveState; +use super::poison::once::OnceExclusiveState; use crate::cell::UnsafeCell; use crate::mem::ManuallyDrop; use crate::ops::{Deref, DerefMut}; @@ -140,14 +140,18 @@ impl T> LazyLock { pub fn into_inner(mut this: Self) -> Result { let state = this.once.state(); match state { - ExclusiveState::Poisoned => panic_poisoned(), + OnceExclusiveState::Poisoned => panic_poisoned(), state => { let this = ManuallyDrop::new(this); let data = unsafe { ptr::read(&this.data) }.into_inner(); match state { - ExclusiveState::Incomplete => Err(ManuallyDrop::into_inner(unsafe { data.f })), - ExclusiveState::Complete => Ok(ManuallyDrop::into_inner(unsafe { data.value })), - ExclusiveState::Poisoned => unreachable!(), + OnceExclusiveState::Incomplete => { + Err(ManuallyDrop::into_inner(unsafe { data.f })) + } + OnceExclusiveState::Complete => { + Ok(ManuallyDrop::into_inner(unsafe { data.value })) + } + OnceExclusiveState::Poisoned => unreachable!(), } } } @@ -189,7 +193,7 @@ impl T> LazyLock { impl Drop for PoisonOnPanic<'_, T, F> { #[inline] fn drop(&mut self) { - self.0.once.set_state(ExclusiveState::Poisoned); + self.0.once.set_state(OnceExclusiveState::Poisoned); } } @@ -200,7 +204,7 @@ impl T> LazyLock { let guard = PoisonOnPanic(this); let data = f(); guard.0.data.get_mut().value = ManuallyDrop::new(data); - guard.0.once.set_state(ExclusiveState::Complete); + guard.0.once.set_state(OnceExclusiveState::Complete); core::mem::forget(guard); // SAFETY: We put the value there above. unsafe { &mut this.data.get_mut().value } @@ -208,11 +212,11 @@ impl T> LazyLock { let state = this.once.state(); match state { - ExclusiveState::Poisoned => panic_poisoned(), + OnceExclusiveState::Poisoned => panic_poisoned(), // SAFETY: The `Once` states we completed the initialization. - ExclusiveState::Complete => unsafe { &mut this.data.get_mut().value }, + OnceExclusiveState::Complete => unsafe { &mut this.data.get_mut().value }, // SAFETY: The state is `Incomplete`. - ExclusiveState::Incomplete => unsafe { really_init_mut(this) }, + OnceExclusiveState::Incomplete => unsafe { really_init_mut(this) }, } } @@ -293,7 +297,7 @@ impl LazyLock { match state { // SAFETY: // The closure has been run successfully, so `value` has been initialized. - ExclusiveState::Complete => Some(unsafe { &mut this.data.get_mut().value }), + OnceExclusiveState::Complete => Some(unsafe { &mut this.data.get_mut().value }), _ => None, } } @@ -332,11 +336,13 @@ impl LazyLock { impl Drop for LazyLock { fn drop(&mut self) { match self.once.state() { - ExclusiveState::Incomplete => unsafe { ManuallyDrop::drop(&mut self.data.get_mut().f) }, - ExclusiveState::Complete => unsafe { + OnceExclusiveState::Incomplete => unsafe { + ManuallyDrop::drop(&mut self.data.get_mut().f) + }, + OnceExclusiveState::Complete => unsafe { ManuallyDrop::drop(&mut self.data.get_mut().value) }, - ExclusiveState::Poisoned => {} + OnceExclusiveState::Poisoned => {} } } } diff --git a/std/src/sync/poison/once.rs b/std/src/sync/poison/once.rs index faf2913c54730..12cc32f381d18 100644 --- a/std/src/sync/poison/once.rs +++ b/std/src/sync/poison/once.rs @@ -49,7 +49,9 @@ pub struct OnceState { pub(crate) inner: sys::OnceState, } -pub(crate) enum ExclusiveState { +/// Used for the internal implementation of `sys::sync::once` on different platforms and the +/// [`LazyLock`](crate::sync::LazyLock) implementation. +pub(crate) enum OnceExclusiveState { Incomplete, Poisoned, Complete, @@ -310,7 +312,7 @@ impl Once { /// be running, so the state must be either "incomplete", "poisoned" or /// "complete". #[inline] - pub(crate) fn state(&mut self) -> ExclusiveState { + pub(crate) fn state(&mut self) -> OnceExclusiveState { self.inner.state() } @@ -320,7 +322,7 @@ impl Once { /// be running, so the state must be either "incomplete", "poisoned" or /// "complete". #[inline] - pub(crate) fn set_state(&mut self, new_state: ExclusiveState) { + pub(crate) fn set_state(&mut self, new_state: OnceExclusiveState) { self.inner.set_state(new_state); } } diff --git a/std/src/sys/sync/once/futex.rs b/std/src/sys/sync/once/futex.rs index 407fdcebcf5cc..18f7f5d3d5f71 100644 --- a/std/src/sys/sync/once/futex.rs +++ b/std/src/sys/sync/once/futex.rs @@ -1,7 +1,7 @@ use crate::cell::Cell; use crate::sync as public; use crate::sync::atomic::Ordering::{Acquire, Relaxed, Release}; -use crate::sync::poison::once::ExclusiveState; +use crate::sync::poison::once::OnceExclusiveState; use crate::sys::futex::{Futex, Primitive, futex_wait, futex_wake_all}; // On some platforms, the OS is very nice and handles the waiter queue for us. @@ -83,21 +83,21 @@ impl Once { } #[inline] - pub(crate) fn state(&mut self) -> ExclusiveState { + pub(crate) fn state(&mut self) -> OnceExclusiveState { match *self.state_and_queued.get_mut() { - INCOMPLETE => ExclusiveState::Incomplete, - POISONED => ExclusiveState::Poisoned, - COMPLETE => ExclusiveState::Complete, + INCOMPLETE => OnceExclusiveState::Incomplete, + POISONED => OnceExclusiveState::Poisoned, + COMPLETE => OnceExclusiveState::Complete, _ => unreachable!("invalid Once state"), } } #[inline] - pub(crate) fn set_state(&mut self, new_state: ExclusiveState) { + pub(crate) fn set_state(&mut self, new_state: OnceExclusiveState) { *self.state_and_queued.get_mut() = match new_state { - ExclusiveState::Incomplete => INCOMPLETE, - ExclusiveState::Poisoned => POISONED, - ExclusiveState::Complete => COMPLETE, + OnceExclusiveState::Incomplete => INCOMPLETE, + OnceExclusiveState::Poisoned => POISONED, + OnceExclusiveState::Complete => COMPLETE, }; } diff --git a/std/src/sys/sync/once/no_threads.rs b/std/src/sys/sync/once/no_threads.rs index 2568059cfe3a8..7c4cd1a5715d8 100644 --- a/std/src/sys/sync/once/no_threads.rs +++ b/std/src/sys/sync/once/no_threads.rs @@ -1,6 +1,6 @@ use crate::cell::Cell; use crate::sync as public; -use crate::sync::poison::once::ExclusiveState; +use crate::sync::poison::once::OnceExclusiveState; pub struct Once { state: Cell, @@ -45,21 +45,21 @@ impl Once { } #[inline] - pub(crate) fn state(&mut self) -> ExclusiveState { + pub(crate) fn state(&mut self) -> OnceExclusiveState { match self.state.get() { - State::Incomplete => ExclusiveState::Incomplete, - State::Poisoned => ExclusiveState::Poisoned, - State::Complete => ExclusiveState::Complete, + State::Incomplete => OnceExclusiveState::Incomplete, + State::Poisoned => OnceExclusiveState::Poisoned, + State::Complete => OnceExclusiveState::Complete, _ => unreachable!("invalid Once state"), } } #[inline] - pub(crate) fn set_state(&mut self, new_state: ExclusiveState) { + pub(crate) fn set_state(&mut self, new_state: OnceExclusiveState) { self.state.set(match new_state { - ExclusiveState::Incomplete => State::Incomplete, - ExclusiveState::Poisoned => State::Poisoned, - ExclusiveState::Complete => State::Complete, + OnceExclusiveState::Incomplete => State::Incomplete, + OnceExclusiveState::Poisoned => State::Poisoned, + OnceExclusiveState::Complete => State::Complete, }); } diff --git a/std/src/sys/sync/once/queue.rs b/std/src/sys/sync/once/queue.rs index 17d99cdb38595..d2663f7771de8 100644 --- a/std/src/sys/sync/once/queue.rs +++ b/std/src/sys/sync/once/queue.rs @@ -58,7 +58,7 @@ use crate::cell::Cell; use crate::sync::atomic::Ordering::{AcqRel, Acquire, Release}; use crate::sync::atomic::{Atomic, AtomicBool, AtomicPtr}; -use crate::sync::poison::once::ExclusiveState; +use crate::sync::poison::once::OnceExclusiveState; use crate::thread::{self, Thread}; use crate::{fmt, ptr, sync as public}; @@ -131,21 +131,21 @@ impl Once { } #[inline] - pub(crate) fn state(&mut self) -> ExclusiveState { + pub(crate) fn state(&mut self) -> OnceExclusiveState { match self.state_and_queue.get_mut().addr() { - INCOMPLETE => ExclusiveState::Incomplete, - POISONED => ExclusiveState::Poisoned, - COMPLETE => ExclusiveState::Complete, + INCOMPLETE => OnceExclusiveState::Incomplete, + POISONED => OnceExclusiveState::Poisoned, + COMPLETE => OnceExclusiveState::Complete, _ => unreachable!("invalid Once state"), } } #[inline] - pub(crate) fn set_state(&mut self, new_state: ExclusiveState) { + pub(crate) fn set_state(&mut self, new_state: OnceExclusiveState) { *self.state_and_queue.get_mut() = match new_state { - ExclusiveState::Incomplete => ptr::without_provenance_mut(INCOMPLETE), - ExclusiveState::Poisoned => ptr::without_provenance_mut(POISONED), - ExclusiveState::Complete => ptr::without_provenance_mut(COMPLETE), + OnceExclusiveState::Incomplete => ptr::without_provenance_mut(INCOMPLETE), + OnceExclusiveState::Poisoned => ptr::without_provenance_mut(POISONED), + OnceExclusiveState::Complete => ptr::without_provenance_mut(COMPLETE), }; } From dd77dc08de80b2029e3542c925bbf41b1e6e6e7d Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 17 Oct 2025 11:02:47 -0400 Subject: [PATCH 104/358] clean up some documentation Signed-off-by: Connor Tsui --- std/src/sync/mod.rs | 2 +- std/src/sync/poison.rs | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/std/src/sync/mod.rs b/std/src/sync/mod.rs index 97c04d07eaf1d..00aac71e2d1c1 100644 --- a/std/src/sync/mod.rs +++ b/std/src/sync/mod.rs @@ -142,7 +142,7 @@ //! most one thread at a time is able to access some data. //! //! - [`Once`]: Used for a thread-safe, one-time global initialization routine. -//! Mostly useful for implementing other types like `OnceLock`. +//! Mostly useful for implementing other types like [`OnceLock`]. //! //! - [`OnceLock`]: Used for thread-safe, one-time initialization of a //! variable, with potentially different initializers based on the caller. diff --git a/std/src/sync/poison.rs b/std/src/sync/poison.rs index 5517082033380..b4f18e755392f 100644 --- a/std/src/sync/poison.rs +++ b/std/src/sync/poison.rs @@ -13,8 +13,8 @@ //! the panics are recognized reliably or on a best-effort basis depend on the //! primitive. See [Overview](#overview) below. //! -//! For the alternative implementations that do not employ poisoning, -//! see [`std::sync::nonpoison`]. +//! The synchronization objects in this module have alternative implementations that do not employ +//! poisoning in the [`std::sync::nonpoison`] module. //! //! [`std::sync::nonpoison`]: crate::sync::nonpoison //! @@ -42,14 +42,6 @@ //! [`Mutex::lock()`] returns a [`LockResult`], providing a way to deal with //! the poisoned state. See [`Mutex`'s documentation](Mutex#poisoning) for more. //! -//! - [`Once`]: A thread-safe way to run a piece of code only once. -//! Mostly useful for implementing one-time global initialization. -//! -//! [`Once`] is reliably poisoned if the piece of code passed to -//! [`Once::call_once()`] or [`Once::call_once_force()`] panics. -//! When in poisoned state, subsequent calls to [`Once::call_once()`] will panic too. -//! [`Once::call_once_force()`] can be used to clear the poisoned state. -//! //! - [`RwLock`]: Provides a mutual exclusion mechanism which allows //! multiple readers at the same time, while allowing only one //! writer at a time. In some cases, this can be more efficient than @@ -59,6 +51,11 @@ //! Note, however, that an `RwLock` may only be poisoned if a panic occurs //! while it is locked exclusively (write mode). If a panic occurs in any reader, //! then the lock will not be poisoned. +//! +//! Note that the [`Once`] type also employs poisoning, but since it has non-poisoning `force` +//! methods available on it, there is no separate `nonpoison` and `poison` version. +//! +//! [`Once`]: crate::sync::Once // If we are not unwinding, `PoisonError` is uninhabited. #![cfg_attr(not(panic = "unwind"), expect(unreachable_code))] From 4fcf584ffc53885788d0041adad81210d0208b64 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 17 Oct 2025 11:16:30 -0400 Subject: [PATCH 105/358] reorganize `library/std/src/sync/mod.rs` file Moves things around to make a bit more sense (plus prepare moving `once` out of `poison`. Signed-off-by: Connor Tsui --- std/src/sync/mod.rs | 59 ++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/std/src/sync/mod.rs b/std/src/sync/mod.rs index 00aac71e2d1c1..7374f09efdcf6 100644 --- a/std/src/sync/mod.rs +++ b/std/src/sync/mod.rs @@ -181,7 +181,20 @@ pub use alloc_crate::sync::UniqueArc; #[stable(feature = "rust1", since = "1.0.0")] pub use alloc_crate::sync::{Arc, Weak}; -// FIXME(sync_nonpoison,sync_poison_mod): remove all `#[doc(inline)]` once the modules are stabilized. +#[unstable(feature = "mpmc_channel", issue = "126840")] +pub mod mpmc; +pub mod mpsc; + +// TODO: Make this `self::once::ONCE_INIT`. +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(inline)] +#[expect(deprecated)] +pub use self::poison::ONCE_INIT; + +mod barrier; +mod lazy_lock; +mod once_lock; +mod reentrant_lock; // These exist only in one flavor: no poisoning. #[stable(feature = "rust1", since = "1.0.0")] @@ -193,48 +206,38 @@ pub use self::once_lock::OnceLock; #[unstable(feature = "reentrant_lock", issue = "121440")] pub use self::reentrant_lock::{ReentrantLock, ReentrantLockGuard}; -// These make sense and exist only with poisoning. +// Note: in the future we will change the default version in `std::sync` to the non-poisoning +// version over an edition. +// See https://github.com/rust-lang/rust/issues/134645#issuecomment-3324577500 for more details. + +#[unstable(feature = "sync_nonpoison", issue = "134645")] +pub mod nonpoison; +#[unstable(feature = "sync_poison_mod", issue = "134646")] +pub mod poison; + +// FIXME(sync_poison_mod): remove all `#[doc(inline)]` once the modules are stabilized. + +// These exist only with poisoning. #[stable(feature = "rust1", since = "1.0.0")] #[doc(inline)] pub use self::poison::{LockResult, PoisonError}; -// These (should) exist in both flavors: with and without poisoning. -// FIXME(sync_nonpoison): implement nonpoison versions: -// * Mutex (nonpoison_mutex) -// * Condvar (nonpoison_condvar) -// * Once (nonpoison_once) -// * RwLock (nonpoison_rwlock) +// These exist in both flavors: with and without poisoning. // The historical default is the version with poisoning. #[stable(feature = "rust1", since = "1.0.0")] #[doc(inline)] pub use self::poison::{ - Mutex, MutexGuard, TryLockError, TryLockResult, + TryLockError, TryLockResult, + Mutex, MutexGuard, + RwLock, RwLockReadGuard, RwLockWriteGuard, Condvar, Once, OnceState, - RwLock, RwLockReadGuard, RwLockWriteGuard, }; -#[stable(feature = "rust1", since = "1.0.0")] -#[doc(inline)] -#[expect(deprecated)] -pub use self::poison::ONCE_INIT; + #[unstable(feature = "mapped_lock_guards", issue = "117108")] #[doc(inline)] pub use self::poison::{MappedMutexGuard, MappedRwLockReadGuard, MappedRwLockWriteGuard}; -#[unstable(feature = "mpmc_channel", issue = "126840")] -pub mod mpmc; -pub mod mpsc; - -#[unstable(feature = "sync_nonpoison", issue = "134645")] -pub mod nonpoison; -#[unstable(feature = "sync_poison_mod", issue = "134646")] -pub mod poison; - -mod barrier; -mod lazy_lock; -mod once_lock; -mod reentrant_lock; - /// A type indicating whether a timed wait on a condition variable returned /// due to a time out or not. /// From c03cc2f5383efadef5da946d2980722b16ff304b Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 17 Oct 2025 11:35:47 -0400 Subject: [PATCH 106/358] move `once` module out of `poison` Since `Once` will not have a non-poisoning variant, we remove it from the `poison` module. Signed-off-by: Connor Tsui --- std/src/sync/lazy_lock.rs | 2 +- std/src/sync/mod.rs | 9 ++++++--- std/src/sync/{poison => }/once.rs | 0 std/src/sync/poison.rs | 6 ------ std/src/sys/sync/once/futex.rs | 2 +- std/src/sys/sync/once/no_threads.rs | 2 +- std/src/sys/sync/once/queue.rs | 2 +- 7 files changed, 10 insertions(+), 13 deletions(-) rename std/src/sync/{poison => }/once.rs (100%) diff --git a/std/src/sync/lazy_lock.rs b/std/src/sync/lazy_lock.rs index 16081d43cd49f..f1cae4b207c9a 100644 --- a/std/src/sync/lazy_lock.rs +++ b/std/src/sync/lazy_lock.rs @@ -1,4 +1,4 @@ -use super::poison::once::OnceExclusiveState; +use super::once::OnceExclusiveState; use crate::cell::UnsafeCell; use crate::mem::ManuallyDrop; use crate::ops::{Deref, DerefMut}; diff --git a/std/src/sync/mod.rs b/std/src/sync/mod.rs index 7374f09efdcf6..19b3040dcb279 100644 --- a/std/src/sync/mod.rs +++ b/std/src/sync/mod.rs @@ -185,11 +185,15 @@ pub use alloc_crate::sync::{Arc, Weak}; pub mod mpmc; pub mod mpsc; -// TODO: Make this `self::once::ONCE_INIT`. +pub(crate) mod once; // `pub(crate)` for the `sys::sync::once` implementations and `LazyLock`. + +#[stable(feature = "rust1", since = "1.0.0")] +pub use self::once::{Once, OnceState}; + #[stable(feature = "rust1", since = "1.0.0")] #[doc(inline)] #[expect(deprecated)] -pub use self::poison::ONCE_INIT; +pub use self::once::ONCE_INIT; mod barrier; mod lazy_lock; @@ -231,7 +235,6 @@ pub use self::poison::{ Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard, Condvar, - Once, OnceState, }; #[unstable(feature = "mapped_lock_guards", issue = "117108")] diff --git a/std/src/sync/poison/once.rs b/std/src/sync/once.rs similarity index 100% rename from std/src/sync/poison/once.rs rename to std/src/sync/once.rs diff --git a/std/src/sync/poison.rs b/std/src/sync/poison.rs index b4f18e755392f..9f40c01546632 100644 --- a/std/src/sync/poison.rs +++ b/std/src/sync/poison.rs @@ -66,11 +66,6 @@ pub use self::condvar::Condvar; pub use self::mutex::MappedMutexGuard; #[stable(feature = "rust1", since = "1.0.0")] pub use self::mutex::{Mutex, MutexGuard}; -#[stable(feature = "rust1", since = "1.0.0")] -#[expect(deprecated)] -pub use self::once::ONCE_INIT; -#[stable(feature = "rust1", since = "1.0.0")] -pub use self::once::{Once, OnceState}; #[unstable(feature = "mapped_lock_guards", issue = "117108")] pub use self::rwlock::{MappedRwLockReadGuard, MappedRwLockWriteGuard}; #[stable(feature = "rust1", since = "1.0.0")] @@ -85,7 +80,6 @@ use crate::thread; mod condvar; #[stable(feature = "rust1", since = "1.0.0")] mod mutex; -pub(crate) mod once; mod rwlock; pub(crate) struct Flag { diff --git a/std/src/sys/sync/once/futex.rs b/std/src/sys/sync/once/futex.rs index 18f7f5d3d5f71..096f1d879ef26 100644 --- a/std/src/sys/sync/once/futex.rs +++ b/std/src/sys/sync/once/futex.rs @@ -1,7 +1,7 @@ use crate::cell::Cell; use crate::sync as public; use crate::sync::atomic::Ordering::{Acquire, Relaxed, Release}; -use crate::sync::poison::once::OnceExclusiveState; +use crate::sync::once::OnceExclusiveState; use crate::sys::futex::{Futex, Primitive, futex_wait, futex_wake_all}; // On some platforms, the OS is very nice and handles the waiter queue for us. diff --git a/std/src/sys/sync/once/no_threads.rs b/std/src/sys/sync/once/no_threads.rs index 7c4cd1a5715d8..576bbf644cbed 100644 --- a/std/src/sys/sync/once/no_threads.rs +++ b/std/src/sys/sync/once/no_threads.rs @@ -1,6 +1,6 @@ use crate::cell::Cell; use crate::sync as public; -use crate::sync::poison::once::OnceExclusiveState; +use crate::sync::once::OnceExclusiveState; pub struct Once { state: Cell, diff --git a/std/src/sys/sync/once/queue.rs b/std/src/sys/sync/once/queue.rs index d2663f7771de8..d7219a7361cff 100644 --- a/std/src/sys/sync/once/queue.rs +++ b/std/src/sys/sync/once/queue.rs @@ -58,7 +58,7 @@ use crate::cell::Cell; use crate::sync::atomic::Ordering::{AcqRel, Acquire, Release}; use crate::sync::atomic::{Atomic, AtomicBool, AtomicPtr}; -use crate::sync::poison::once::OnceExclusiveState; +use crate::sync::once::OnceExclusiveState; use crate::thread::{self, Thread}; use crate::{fmt, ptr, sync as public}; From 1a36519584740cdbab7f12900579a3403e901be8 Mon Sep 17 00:00:00 2001 From: Marijn Schouten Date: Fri, 17 Oct 2025 15:22:54 +0000 Subject: [PATCH 107/358] btree: some cleanup with less unsafe --- alloc/src/collections/btree/node.rs | 33 ++++++++++++----------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/alloc/src/collections/btree/node.rs b/alloc/src/collections/btree/node.rs index a87259e7c58f2..84dd4b7e49def 100644 --- a/alloc/src/collections/btree/node.rs +++ b/alloc/src/collections/btree/node.rs @@ -33,6 +33,7 @@ use core::marker::PhantomData; use core::mem::{self, MaybeUninit}; +use core::num::NonZero; use core::ptr::{self, NonNull}; use core::slice::SliceIndex; @@ -143,7 +144,7 @@ type BoxedNode = NonNull>; /// /// A reference to a node. /// -/// This type has a number of parameters that controls how it acts: +/// This type has a number of parameters that control how it acts: /// - `BorrowType`: A dummy type that describes the kind of borrow and carries a lifetime. /// - When this is `Immut<'a>`, the `NodeRef` acts roughly like `&'a Node`. /// - When this is `ValMut<'a>`, the `NodeRef` acts roughly like `&'a Node` @@ -226,33 +227,27 @@ impl NodeRef { fn from_new_leaf(leaf: Box, A>) -> Self { // The allocator must be dropped, not leaked. See also `BTreeMap::alloc`. - let (leaf, _alloc) = Box::into_raw_with_allocator(leaf); - // SAFETY: the node was just allocated. - let node = unsafe { NonNull::new_unchecked(leaf) }; + let (node, _alloc) = Box::into_non_null_with_allocator(leaf); NodeRef { height: 0, node, _marker: PhantomData } } } impl NodeRef { + /// Creates a new internal (height > 0) `NodeRef` fn new_internal(child: Root, alloc: A) -> Self { let mut new_node = unsafe { InternalNode::new(alloc) }; new_node.edges[0].write(child.node); - unsafe { NodeRef::from_new_internal(new_node, child.height + 1) } + NodeRef::from_new_internal(new_node, NonZero::new(child.height + 1).unwrap()) } - /// # Safety - /// `height` must not be zero. - unsafe fn from_new_internal( + /// Creates a new internal (height > 0) `NodeRef` from an existing internal node + fn from_new_internal( internal: Box, A>, - height: usize, + height: NonZero, ) -> Self { - debug_assert!(height > 0); // The allocator must be dropped, not leaked. See also `BTreeMap::alloc`. - let (internal, _alloc) = Box::into_raw_with_allocator(internal); - // SAFETY: the node was just allocated. - let internal = unsafe { NonNull::new_unchecked(internal) }; - let node = internal.cast(); - let mut this = NodeRef { height, node, _marker: PhantomData }; + let (node, _alloc) = Box::into_non_null_with_allocator(internal); + let mut this = NodeRef { height: height.into(), node: node.cast(), _marker: PhantomData }; this.borrow_mut().correct_all_childrens_parent_links(); this } @@ -625,9 +620,8 @@ impl NodeRef { let top = self.node; // SAFETY: we asserted to be internal. - let internal_self = unsafe { self.borrow_mut().cast_to_internal_unchecked() }; - // SAFETY: we borrowed `self` exclusively and its borrow type is exclusive. - let internal_node = unsafe { &mut *NodeRef::as_internal_ptr(&internal_self) }; + let mut internal_self = unsafe { self.borrow_mut().cast_to_internal_unchecked() }; + let internal_node = internal_self.as_internal_mut(); // SAFETY: the first edge is always initialized. self.node = unsafe { internal_node.edges[0].assume_init_read() }; self.height -= 1; @@ -1305,7 +1299,8 @@ impl<'a, K: 'a, V: 'a> Handle, K, V, marker::Internal>, &mut new_node.edges[..new_len + 1], ); - let height = self.node.height; + // SAFETY: self is `marker::Internal`, so `self.node.height` is positive + let height = NonZero::new_unchecked(self.node.height); let right = NodeRef::from_new_internal(new_node, height); SplitResult { left: self.node, kv, right } From d370fc48a0e22f1e0251c62136f9a34c80aebce9 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 13 Sep 2025 16:58:40 +0200 Subject: [PATCH 108/358] Result/Option layout guarantee clarifications --- core/src/option.rs | 7 +++++-- core/src/result.rs | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/core/src/option.rs b/core/src/option.rs index 430ee3470ac3f..47c554d75edb1 100644 --- a/core/src/option.rs +++ b/core/src/option.rs @@ -119,8 +119,11 @@ //! # Representation //! //! Rust guarantees to optimize the following types `T` such that -//! [`Option`] has the same size, alignment, and [function call ABI] as `T`. In some -//! of these cases, Rust further guarantees the following: +//! [`Option`] has the same size, alignment, and [function call ABI] as `T`. +//! It is therefore sound to transmute `t: T` to `Option` (which will produce `Some(t)`), and +//! to transmute `Some(t): Option` to `T` (which will produce `t`). +//! +//! In some of these cases, Rust further guarantees the following: //! - `transmute::<_, Option>([0u8; size_of::()])` is sound and produces //! `Option::::None` //! - `transmute::<_, [u8; size_of::()]>(Option::::None)` is sound and produces diff --git a/core/src/result.rs b/core/src/result.rs index c69762a728598..324f288594b19 100644 --- a/core/src/result.rs +++ b/core/src/result.rs @@ -230,24 +230,30 @@ //! //! # Representation //! -//! In some cases, [`Result`] will gain the same size, alignment, and ABI -//! guarantees as [`Option`] has. One of either the `T` or `E` type must be a -//! type that qualifies for the `Option` [representation guarantees][opt-rep], -//! and the *other* type must meet all of the following conditions: +//! In some cases, [`Result`] comes with size, alignment, and ABI guarantees. +//! Specifically, one of either the `T` or `E` type must be a type that qualifies for the `Option` +//! [representation guarantees][opt-rep] (let's call that type `I`), and the *other* type must meet +//! all of the following conditions: //! * Is a zero-sized type with alignment 1 (a "1-ZST"). //! * Has no fields. //! * Does not have the `#[non_exhaustive]` attribute. //! +//! If that is the case, then `Result` has the same size, alignment, and [function call ABI] +//! as `I` (and therefore, as `Option`). If `I` is `T`, it is therefore sound to transmute `t: I` +//! to `Result` (which will produce `Ok(t)`), and to transmute `Ok(t): Result` to `I` +//! (which will produce `t`). If `I` is `E`, the same applies with `Ok` replaced by `Err`. +//! //! For example, `NonZeroI32` qualifies for the `Option` representation //! guarantees, and `()` is a zero-sized type with alignment 1, no fields, and //! it isn't `non_exhaustive`. This means that both `Result` and -//! `Result<(), NonZeroI32>` have the same size, alignment, and ABI guarantees -//! as `Option`. The only difference is the implied semantics: +//! `Result<(), NonZeroI32>` have the same size, alignment, and ABI +//! as `NonZeroI32` (and `Option`). The only difference is the implied semantics: //! * `Option` is "a non-zero i32 might be present" //! * `Result` is "a non-zero i32 success result, if any" //! * `Result<(), NonZeroI32>` is "a non-zero i32 error result, if any" //! //! [opt-rep]: ../option/index.html#representation "Option Representation" +//! [function call ABI]: ../primitive.fn.html#abi-compatibility //! //! # Method overview //! From 80966ffa241f236854a16c5f98b35b0fa124bcbc Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 13 Sep 2025 17:23:29 +0200 Subject: [PATCH 109/358] clarify 'no fields' --- core/src/result.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/result.rs b/core/src/result.rs index 324f288594b19..86602b6200040 100644 --- a/core/src/result.rs +++ b/core/src/result.rs @@ -235,7 +235,7 @@ //! [representation guarantees][opt-rep] (let's call that type `I`), and the *other* type must meet //! all of the following conditions: //! * Is a zero-sized type with alignment 1 (a "1-ZST"). -//! * Has no fields. +//! * Is either a struct/tuple with no fields, or an enum with no variants. //! * Does not have the `#[non_exhaustive]` attribute. //! //! If that is the case, then `Result` has the same size, alignment, and [function call ABI] From 442c119806bd49624583c3d78efe0a34217da469 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 13 Sep 2025 21:05:30 +0200 Subject: [PATCH 110/358] have Result docs match ABI docs --- core/src/result.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/core/src/result.rs b/core/src/result.rs index 86602b6200040..39689e657a580 100644 --- a/core/src/result.rs +++ b/core/src/result.rs @@ -232,20 +232,16 @@ //! //! In some cases, [`Result`] comes with size, alignment, and ABI guarantees. //! Specifically, one of either the `T` or `E` type must be a type that qualifies for the `Option` -//! [representation guarantees][opt-rep] (let's call that type `I`), and the *other* type must meet -//! all of the following conditions: -//! * Is a zero-sized type with alignment 1 (a "1-ZST"). -//! * Is either a struct/tuple with no fields, or an enum with no variants. -//! * Does not have the `#[non_exhaustive]` attribute. +//! [representation guarantees][opt-rep] (let's call that type `I`), and the *other* type +//! is a zero-sized type with alignment 1 (a "1-ZST"). //! //! If that is the case, then `Result` has the same size, alignment, and [function call ABI] //! as `I` (and therefore, as `Option`). If `I` is `T`, it is therefore sound to transmute `t: I` //! to `Result` (which will produce `Ok(t)`), and to transmute `Ok(t): Result` to `I` //! (which will produce `t`). If `I` is `E`, the same applies with `Ok` replaced by `Err`. //! -//! For example, `NonZeroI32` qualifies for the `Option` representation -//! guarantees, and `()` is a zero-sized type with alignment 1, no fields, and -//! it isn't `non_exhaustive`. This means that both `Result` and +//! For example, `NonZeroI32` qualifies for the `Option` representation guarantees, and `()` is a +//! zero-sized type with alignment 1. This means that both `Result` and //! `Result<(), NonZeroI32>` have the same size, alignment, and ABI //! as `NonZeroI32` (and `Option`). The only difference is the implied semantics: //! * `Option` is "a non-zero i32 might be present" From 224956f5bb3faa21d320e2a5f12a2a4b44f85a8b Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Sat, 18 Oct 2025 00:23:15 +0000 Subject: [PATCH 111/358] Revise `Result`/`Option` guarantee docs The notation used here (e.g. "transmute `t: T` to `Option`") felt maybe a bit heavy, in the context of the library documentation, so let's elaborate this a bit. Also, in the `Option` docs, we talk about this being true for "the following types `T`", but it felt this caveat might get a bit lost in the next sentence that talks about the valid transmutations, so let's reiterate the caveat. While we're touching the line, we can improve: > The only difference is the implied semantics: This sentence was a bit awkward due to the mismatched plurality and not identifying the difference being spoken to. We'll reword this to make it more clear. We'll wrap to 80, since the existing text and most of the doc comments in these files are wrapped this way. --- core/src/option.rs | 10 ++++++---- core/src/result.rs | 33 +++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/core/src/option.rs b/core/src/option.rs index 47c554d75edb1..e3c4758bc6af5 100644 --- a/core/src/option.rs +++ b/core/src/option.rs @@ -118,10 +118,12 @@ //! //! # Representation //! -//! Rust guarantees to optimize the following types `T` such that -//! [`Option`] has the same size, alignment, and [function call ABI] as `T`. -//! It is therefore sound to transmute `t: T` to `Option` (which will produce `Some(t)`), and -//! to transmute `Some(t): Option` to `T` (which will produce `t`). +//! Rust guarantees to optimize the following types `T` such that [`Option`] +//! has the same size, alignment, and [function call ABI] as `T`. It is +//! therefore sound, when `T` is one of these types, to transmute a value `t` of +//! type `T` to type `Option` (producing the value `Some(t)`) and to +//! transmute a value `Some(t)` of type `Option` to type `T` (producing the +//! value `t`). //! //! In some of these cases, Rust further guarantees the following: //! - `transmute::<_, Option>([0u8; size_of::()])` is sound and produces diff --git a/core/src/result.rs b/core/src/result.rs index 39689e657a580..6fee7febde38d 100644 --- a/core/src/result.rs +++ b/core/src/result.rs @@ -230,20 +230,25 @@ //! //! # Representation //! -//! In some cases, [`Result`] comes with size, alignment, and ABI guarantees. -//! Specifically, one of either the `T` or `E` type must be a type that qualifies for the `Option` -//! [representation guarantees][opt-rep] (let's call that type `I`), and the *other* type -//! is a zero-sized type with alignment 1 (a "1-ZST"). -//! -//! If that is the case, then `Result` has the same size, alignment, and [function call ABI] -//! as `I` (and therefore, as `Option`). If `I` is `T`, it is therefore sound to transmute `t: I` -//! to `Result` (which will produce `Ok(t)`), and to transmute `Ok(t): Result` to `I` -//! (which will produce `t`). If `I` is `E`, the same applies with `Ok` replaced by `Err`. -//! -//! For example, `NonZeroI32` qualifies for the `Option` representation guarantees, and `()` is a -//! zero-sized type with alignment 1. This means that both `Result` and -//! `Result<(), NonZeroI32>` have the same size, alignment, and ABI -//! as `NonZeroI32` (and `Option`). The only difference is the implied semantics: +//! In some cases, [`Result`] comes with size, alignment, and ABI +//! guarantees. Specifically, one of either the `T` or `E` type must be a type +//! that qualifies for the `Option` [representation guarantees][opt-rep] (let's +//! call that type `I`), and the *other* type is a zero-sized type with +//! alignment 1 (a "1-ZST"). +//! +//! If that is the case, then `Result` has the same size, alignment, and +//! [function call ABI] as `I` (and therefore, as `Option`). If `I` is `T`, +//! it is therefore sound to transmute a value `t` of type `I` to type +//! `Result` (producing the value `Ok(t)`) and to transmute a value +//! `Ok(t)` of type `Result` to type `I` (producing the value `t`). If `I` +//! is `E`, the same applies with `Ok` replaced by `Err`. +//! +//! For example, `NonZeroI32` qualifies for the `Option` representation +//! guarantees and `()` is a zero-sized type with alignment 1. This means that +//! both `Result` and `Result<(), NonZeroI32>` have the same +//! size, alignment, and ABI as `NonZeroI32` (and `Option`). The +//! only difference between these is in the implied semantics: +//! //! * `Option` is "a non-zero i32 might be present" //! * `Result` is "a non-zero i32 success result, if any" //! * `Result<(), NonZeroI32>` is "a non-zero i32 error result, if any" From 0d0448d69e361654338e38fce2a39aba02001886 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sat, 18 Oct 2025 11:27:36 +0530 Subject: [PATCH 112/358] feat: separate `intrinsic-test` from the other CI pipelines --- stdarch/.github/workflows/main.yml | 69 +++++++++++++++++ stdarch/ci/intrinsic-test-docker.sh | 58 ++++++++++++++ stdarch/ci/intrinsic-test.sh | 114 ++++++++++++++++++++++++++++ stdarch/ci/run.sh | 51 ------------- 4 files changed, 241 insertions(+), 51 deletions(-) create mode 100755 stdarch/ci/intrinsic-test-docker.sh create mode 100755 stdarch/ci/intrinsic-test.sh diff --git a/stdarch/.github/workflows/main.yml b/stdarch/.github/workflows/main.yml index b0d476f0e2ea5..f80855b08b994 100644 --- a/stdarch/.github/workflows/main.yml +++ b/stdarch/.github/workflows/main.yml @@ -248,6 +248,74 @@ jobs: if: matrix.target.os == 'ubuntu-latest' && !startsWith(matrix.target.tuple, 'thumb') env: TARGET: ${{ matrix.target.tuple }} + + intrinsic-test: + needs: [style] + name: Intrinsic Test + runs-on: ${{ matrix.target.os }} + strategy: + matrix: + profile: + - dev + - release + target: + # Dockers that are run through docker on linux + - tuple: arm-unknown-linux-gnueabihf + os: ubuntu-latest + - tuple: armv7-unknown-linux-gnueabihf + os: ubuntu-latest + - tuple: aarch64-unknown-linux-gnu + os: ubuntu-latest + - tuple: aarch64_be-unknown-linux-gnu + os: ubuntu-latest + + # Add additional variables to the matrix variations generated above using `include`: + include: + # `TEST_EVERYTHING` setups - there should be at least 1 for each architecture + - target: + tuple: aarch64-unknown-linux-gnu + os: ubuntu-latest + test_everything: true + - target: + tuple: aarch64_be-unknown-linux-gnu + os: ubuntu-latest + test_everything: true + build_std: true + - target: + tuple: armv7-unknown-linux-gnueabihf + os: ubuntu-latest + test_everything: true + + steps: + - uses: actions/checkout@v4 + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + shell: bash + - run: rustup target add ${{ matrix.target.tuple }} + shell: bash + if: matrix.build_std == '' + - run: | + rustup component add rust-src + echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV + shell: bash + if: matrix.build_std != '' + + # Configure some env vars based on matrix configuration + - run: echo "PROFILE=--profile=${{matrix.profile}}" >> $GITHUB_ENV + shell: bash + - run: echo "STDARCH_TEST_EVERYTHING=1" >> $GITHUB_ENV + shell: bash + if: matrix.test_everything != '' + - run: echo "STDARCH_DISABLE_ASSERT_INSTR=1" >> $GITHUB_ENV + shell: bash + if: matrix.disable_assert_instr != '' + - run: ./ci/intrinsic-test-docker.sh ${{ matrix.target.tuple }} + shell: bash + if: matrix.target.os == 'ubuntu-latest' && !startsWith(matrix.target.tuple, 'thumb') + env: + TARGET: ${{ matrix.target.tuple }} # Check that the generated files agree with the checked-in versions. check-stdarch-gen: @@ -276,6 +344,7 @@ jobs: - docs - verify - test + - intrinsic-test - check-stdarch-gen runs-on: ubuntu-latest # We need to ensure this job does *not* get skipped if its dependencies fail, diff --git a/stdarch/ci/intrinsic-test-docker.sh b/stdarch/ci/intrinsic-test-docker.sh new file mode 100755 index 0000000000000..f9c6edc34991d --- /dev/null +++ b/stdarch/ci/intrinsic-test-docker.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env sh + +# Small script to run tests for a target (or all targets) inside all the +# respective docker images. + +set -ex + +if [ $# -lt 1 ]; then + >&2 echo "Usage: $0 " + exit 1 +fi + +run() { + # Set the linker that is used for the host (e.g. when compiling a build.rs) + # This overrides any configuration in e.g. `.cargo/config.toml`, which will + # probably not work within the docker container. + HOST_LINKER="CARGO_TARGET_$(rustc --print host-tuple | tr '[:lower:]-' '[:upper:]_')_LINKER" + + # Prevent `Read-only file system (os error 30)`. + cargo generate-lockfile + + echo "Building docker container for TARGET=${1}" + docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/ + mkdir -p target c_programs rust_programs + echo "Running docker" + # shellcheck disable=SC2016 + docker run \ + --rm \ + --user "$(id -u)":"$(id -g)" \ + --env CARGO_HOME=/cargo \ + --env CARGO_TARGET_DIR=/checkout/target \ + --env TARGET="${1}" \ + --env "${HOST_LINKER}"="cc" \ + --env STDARCH_TEST_EVERYTHING \ + --env STDARCH_DISABLE_ASSERT_INSTR \ + --env NOSTD \ + --env NORUN \ + --env RUSTFLAGS \ + --env CARGO_UNSTABLE_BUILD_STD \ + --volume "${HOME}/.cargo":/cargo \ + --volume "$(rustc --print sysroot)":/rust:ro \ + --volume "$(pwd)":/checkout:ro \ + --volume "$(pwd)"/target:/checkout/target \ + --volume "$(pwd)"/c_programs:/checkout/c_programs \ + --volume "$(pwd)"/rust_programs:/checkout/rust_programs \ + --init \ + --workdir /checkout \ + --privileged \ + stdarch \ + sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh ${1}" +} + +if [ -z "$1" ]; then + >&2 echo "No target specified!" + exit 1 +else + run "${1}" +fi diff --git a/stdarch/ci/intrinsic-test.sh b/stdarch/ci/intrinsic-test.sh new file mode 100755 index 0000000000000..fe47f235bef16 --- /dev/null +++ b/stdarch/ci/intrinsic-test.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env sh + +set -ex + +: "${TARGET?The TARGET environment variable must be set.}" + +# Tests are all super fast anyway, and they fault often enough on travis that +# having only one thread increases debuggability to be worth it. +#export RUST_BACKTRACE=full +#export RUST_TEST_NOCAPTURE=1 +#export RUST_TEST_THREADS=1 + +export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" +export HOST_RUSTFLAGS="${RUSTFLAGS}" +export PROFILE="${PROFILE:="--profile=release"}" + +case ${TARGET} in + # On 32-bit use a static relocation model which avoids some extra + # instructions when dealing with static data, notably allowing some + # instruction assertion checks to pass below the 20 instruction limit. If + # this is the default, dynamic, then too many instructions are generated + # when we assert the instruction for a function and it causes tests to fail. + i686-* | i586-*) + export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static" + ;; + # Some x86_64 targets enable by default more features beyond SSE2, + # which cause some instruction assertion checks to fail. + x86_64-*) + export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3" + ;; + #Unoptimized build uses fast-isel which breaks with msa + mips-* | mipsel-*) + export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false" + ;; + armv7-*eabihf | thumbv7-*eabihf) + export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon" + ;; + # Some of our test dependencies use the deprecated `gcc` crates which + # doesn't detect RISC-V compilers automatically, so do it manually here. + riscv*) + export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc" + ;; +esac + +echo "RUSTFLAGS=${RUSTFLAGS}" +echo "OBJDUMP=${OBJDUMP}" +echo "STDARCH_DISABLE_ASSERT_INSTR=${STDARCH_DISABLE_ASSERT_INSTR}" +echo "STDARCH_TEST_EVERYTHING=${STDARCH_TEST_EVERYTHING}" +echo "STDARCH_TEST_SKIP_FEATURE=${STDARCH_TEST_SKIP_FEATURE}" +echo "STDARCH_TEST_SKIP_FUNCTION=${STDARCH_TEST_SKIP_FUNCTION}" +echo "PROFILE=${PROFILE}" + +INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" + +# Test targets compiled with extra features. +case ${TARGET} in + + x86_64* | i686*) + export STDARCH_DISABLE_ASSERT_INSTR=1 + ;; + + # Setup aarch64 & armv7 specific variables, the runner, along with some + # tests to skip + aarch64-unknown-linux-gnu*) + TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt + TEST_CXX_COMPILER="clang++" + TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" + ;; + + aarch64_be-unknown-linux-gnu*) + TEST_CPPFLAGS="-fuse-ld=lld" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt + TEST_CXX_COMPILER="clang++" + TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}" + ;; + + armv7-unknown-linux-gnueabihf*) + TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt + TEST_CXX_COMPILER="clang++" + TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" + ;; + *) + ;; + +esac + +# Arm specific +case "${TARGET}" in + aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*) + CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ + cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ + --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ + --runner "${TEST_RUNNER}" \ + --cppcompiler "${TEST_CXX_COMPILER}" \ + --skip "${TEST_SKIP_INTRINSICS}" \ + --target "${TARGET}" + ;; + + aarch64_be-unknown-linux-gnu*) + CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ + cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ + --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ + --runner "${TEST_RUNNER}" \ + --cppcompiler "${TEST_CXX_COMPILER}" \ + --skip "${TEST_SKIP_INTRINSICS}" \ + --target "${TARGET}" \ + --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \ + --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" + ;; + *) + ;; +esac diff --git a/stdarch/ci/run.sh b/stdarch/ci/run.sh index aa4479395d5b6..2bb77bae256f1 100755 --- a/stdarch/ci/run.sh +++ b/stdarch/ci/run.sh @@ -79,7 +79,6 @@ cargo_test() { CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml" STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml" -INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" cargo_test "${CORE_ARCH} ${PROFILE}" @@ -130,61 +129,11 @@ case ${TARGET} in export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec" cargo_test "${PROFILE}" ;; - - # Setup aarch64 & armv7 specific variables, the runner, along with some - # tests to skip - aarch64-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" - ;; - - aarch64_be-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}" - ;; - - armv7-unknown-linux-gnueabihf*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" - ;; *) ;; esac -# Arm specific -case "${TARGET}" in - aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ - cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ - --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --runner "${TEST_RUNNER}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" - ;; - - aarch64_be-unknown-linux-gnu*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ - cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ - --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --runner "${TEST_RUNNER}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" \ - --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \ - --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" - ;; - *) - ;; -esac - if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then # Test examples ( From 0623d8a60d0132c5ce60acf56300f4ef431dd576 Mon Sep 17 00:00:00 2001 From: Waffle Lapkin Date: Sat, 18 Oct 2025 14:07:00 +0200 Subject: [PATCH 113/358] hide `alloc::alloc::box_new` in docs It's an internal function which isn't supposed to be used outside standard library / `vec!`. --- alloc/src/boxed.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/alloc/src/boxed.rs b/alloc/src/boxed.rs index 49ff768bed1b2..ae43fbfe1d69e 100644 --- a/alloc/src/boxed.rs +++ b/alloc/src/boxed.rs @@ -237,6 +237,7 @@ pub struct Box< /// the newly allocated memory. This is an intrinsic to avoid unnecessary copies. /// /// This is the surface syntax for `box ` expressions. +#[doc(hidden)] #[rustc_intrinsic] #[unstable(feature = "liballoc_internals", issue = "none")] pub fn box_new(x: T) -> Box; From 6d1c607947f5bf74e21891f8b701e43d8f19f033 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sun, 19 Oct 2025 05:50:26 +0100 Subject: [PATCH 114/358] std::thread::available_parallelism() vxworks libc symbol usage. --- std/src/sys/thread/unix.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/std/src/sys/thread/unix.rs b/std/src/sys/thread/unix.rs index 2d2c4f9021288..2a3e3a9715f80 100644 --- a/std/src/sys/thread/unix.rs +++ b/std/src/sys/thread/unix.rs @@ -314,13 +314,10 @@ pub fn available_parallelism() -> io::Result> { target_os = "vxworks" => { // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF // expectations than the actual cores availability. - unsafe extern "C" { - fn vxCpuEnabledGet() -> libc::cpuset_t; - } // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set unsafe{ - let set = vxCpuEnabledGet(); + let set = libc::vxCpuEnabledGet(); Ok(NonZero::new_unchecked(set.count_ones() as usize)) } } From a2a8b0b4d1f3a17eb96c3f541912833a0003cc89 Mon Sep 17 00:00:00 2001 From: ltdk Date: Sun, 19 Oct 2025 02:44:41 -0400 Subject: [PATCH 115/358] Move const_drop_in_place test from ptr to manually_drop module --- coretests/tests/manually_drop.rs | 38 ++++++++++++++++++++++++++++++ coretests/tests/ptr.rs | 40 +------------------------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/coretests/tests/manually_drop.rs b/coretests/tests/manually_drop.rs index bbf444471ad2a..1638c82b161b4 100644 --- a/coretests/tests/manually_drop.rs +++ b/coretests/tests/manually_drop.rs @@ -27,3 +27,41 @@ fn smoke() { drop(x); drop(y); } + +#[test] +fn const_drop_in_place() { + const COUNTER: usize = { + use core::cell::Cell; + + let counter = Cell::new(0); + + // only exists to make `Drop` indirect impl + #[allow(dead_code)] + struct Test<'a>(Dropped<'a>); + + struct Dropped<'a>(&'a Cell); + impl const Drop for Dropped<'_> { + fn drop(&mut self) { + self.0.set(self.0.get() + 1); + } + } + + let mut one = ManuallyDrop::new(Test(Dropped(&counter))); + let mut two = ManuallyDrop::new(Test(Dropped(&counter))); + let mut three = ManuallyDrop::new(Test(Dropped(&counter))); + assert!(counter.get() == 0); + unsafe { + ManuallyDrop::drop(&mut one); + } + assert!(counter.get() == 1); + unsafe { + ManuallyDrop::drop(&mut two); + } + assert!(counter.get() == 2); + unsafe { + ManuallyDrop::drop(&mut three); + } + counter.get() + }; + assert_eq!(COUNTER, 3); +} diff --git a/coretests/tests/ptr.rs b/coretests/tests/ptr.rs index 7afbb1260f239..4d5138d539b95 100644 --- a/coretests/tests/ptr.rs +++ b/coretests/tests/ptr.rs @@ -1,6 +1,6 @@ use core::cell::RefCell; use core::marker::Freeze; -use core::mem::{ManuallyDrop, MaybeUninit}; +use core::mem::MaybeUninit; use core::num::NonZero; use core::ptr; use core::ptr::*; @@ -1045,41 +1045,3 @@ fn test_ptr_default() { let default = PtrMutDefaultTest::default(); assert!(default.ptr.is_null()); } - -#[test] -fn test_const_drop_in_place() { - const COUNTER: usize = { - use core::cell::Cell; - - let counter = Cell::new(0); - - // only exists to make `Drop` indirect impl - #[allow(dead_code)] - struct Test<'a>(Dropped<'a>); - - struct Dropped<'a>(&'a Cell); - impl const Drop for Dropped<'_> { - fn drop(&mut self) { - self.0.set(self.0.get() + 1); - } - } - - let mut one = ManuallyDrop::new(Test(Dropped(&counter))); - let mut two = ManuallyDrop::new(Test(Dropped(&counter))); - let mut three = ManuallyDrop::new(Test(Dropped(&counter))); - assert!(counter.get() == 0); - unsafe { - ManuallyDrop::drop(&mut one); - } - assert!(counter.get() == 1); - unsafe { - ManuallyDrop::drop(&mut two); - } - assert!(counter.get() == 2); - unsafe { - ManuallyDrop::drop(&mut three); - } - counter.get() - }; - assert_eq!(COUNTER, 3); -} From 1e556ab860c24c12110d86673837ce303857b425 Mon Sep 17 00:00:00 2001 From: joboet Date: Mon, 20 Oct 2025 17:43:07 +0200 Subject: [PATCH 116/358] handle spurious returns of `wait_timeout` in test --- std/tests/sync/condvar.rs | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/std/tests/sync/condvar.rs b/std/tests/sync/condvar.rs index 2a525f9b5e948..42b880e283afe 100644 --- a/std/tests/sync/condvar.rs +++ b/std/tests/sync/condvar.rs @@ -285,17 +285,31 @@ nonpoison_and_poison_unwrap_test!( thread::scope(|s| { s.spawn(|| { + // Sleep so that the other thread has a chance to encounter the + // timeout. thread::sleep(Duration::from_secs(2)); maybe_unwrap(sent.set(true)); cond.notify_all(); }); - let guard = maybe_unwrap(sent.lock()); - // If there is internal overflow, this call will return almost - // immediately, before the other thread has reached the `notify_all` - let (guard, res) = maybe_unwrap(cond.wait_timeout(guard, Duration::from_secs(u64::MAX.div_ceil(1_000_000_000)))); - assert!(!res.timed_out()); - assert!(*guard); + let mut guard = maybe_unwrap(sent.lock()); + // Loop until `sent` is set by the thread to guard against spurious + // wakeups. If the `wait_timeout` happens just before the signal by + // the other thread, such a spurious wakeup might prevent the + // miscalculated timeout from occurring, but this is basically just + // a smoke test anyway. + loop { + if *guard { + break; + } + + // If there is internal overflow, this call will return almost + // immediately, before the other thread has reached the `notify_all`, + // and indicate a timeout. + let (g, res) = maybe_unwrap(cond.wait_timeout(guard, Duration::from_secs(u64::MAX.div_ceil(1_000_000_000)))); + assert!(!res.timed_out()); + guard = g; + } }) } ); From 75f5697de7a4b6941eaf0d6e4f5d60b5d62a384c Mon Sep 17 00:00:00 2001 From: Scott Schafer Date: Thu, 2 Oct 2025 15:58:27 -0600 Subject: [PATCH 117/358] chore: Update typos to 1.38.1 --- alloc/src/collections/vec_deque/tests.rs | 6 +++--- core/src/asserting.rs | 2 ++ core/src/panic/unwind_safe.rs | 4 ++-- std/src/sys/fs/vexos.rs | 2 +- std/src/sys/pal/uefi/time.rs | 2 +- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/alloc/src/collections/vec_deque/tests.rs b/alloc/src/collections/vec_deque/tests.rs index ad76cb14deb86..2501534e95080 100644 --- a/alloc/src/collections/vec_deque/tests.rs +++ b/alloc/src/collections/vec_deque/tests.rs @@ -367,7 +367,7 @@ fn test_rotate_right_panic() { #[test] fn test_binary_search() { - // If the givin VecDeque is not sorted, the returned result is unspecified and meaningless, + // If the given VecDeque is not sorted, the returned result is unspecified and meaningless, // as this method performs a binary search. let tester: VecDeque<_> = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55].into(); @@ -391,7 +391,7 @@ fn test_binary_search() { #[test] fn test_binary_search_by() { - // If the givin VecDeque is not sorted, the returned result is unspecified and meaningless, + // If the given VecDeque is not sorted, the returned result is unspecified and meaningless, // as this method performs a binary search. let tester: VecDeque<_> = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55].into(); @@ -406,7 +406,7 @@ fn test_binary_search_by() { #[test] fn test_binary_search_key() { - // If the givin VecDeque is not sorted, the returned result is unspecified and meaningless, + // If the given VecDeque is not sorted, the returned result is unspecified and meaningless, // as this method performs a binary search. let tester: VecDeque<_> = [ diff --git a/core/src/asserting.rs b/core/src/asserting.rs index 3015aa562e6c0..acaee72c93016 100644 --- a/core/src/asserting.rs +++ b/core/src/asserting.rs @@ -73,12 +73,14 @@ where // ***** Others ***** +//spellchecker:off /// All possible captured `assert!` elements /// /// # Types /// /// * `E`: **E**lement that is going to be displayed. /// * `M`: **M**arker used to differentiate [Capture]s in regards to [Debug]. +//spellchecker:on #[unstable(feature = "generic_assert_internals", issue = "44838")] pub struct Capture { // If None, then `E` does not implements [Printable] or `E` wasn't evaluated (`assert!( ... )` diff --git a/core/src/panic/unwind_safe.rs b/core/src/panic/unwind_safe.rs index 722af55103839..21dbd09f49606 100644 --- a/core/src/panic/unwind_safe.rs +++ b/core/src/panic/unwind_safe.rs @@ -101,9 +101,9 @@ pub auto trait UnwindSafe {} #[rustc_diagnostic_item = "ref_unwind_safe_trait"] #[diagnostic::on_unimplemented( message = "the type `{Self}` may contain interior mutability and a reference may not be safely \ - transferrable across a catch_unwind boundary", + transferable across a catch_unwind boundary", label = "`{Self}` may contain interior mutability and a reference may not be safely \ - transferrable across a catch_unwind boundary" + transferable across a catch_unwind boundary" )] pub auto trait RefUnwindSafe {} diff --git a/std/src/sys/fs/vexos.rs b/std/src/sys/fs/vexos.rs index 99b156d535768..381c87c62c688 100644 --- a/std/src/sys/fs/vexos.rs +++ b/std/src/sys/fs/vexos.rs @@ -106,7 +106,7 @@ impl FilePermissions { } pub fn set_readonly(&mut self, _readonly: bool) { - panic!("Perimissions do not exist") + panic!("Permissions do not exist") } } diff --git a/std/src/sys/pal/uefi/time.rs b/std/src/sys/pal/uefi/time.rs index c6636626fd58a..861b98da18daf 100644 --- a/std/src/sys/pal/uefi/time.rs +++ b/std/src/sys/pal/uefi/time.rs @@ -181,7 +181,7 @@ pub(crate) mod system_time_internal { /// The changes are to use 1900-01-01-00:00:00 with timezone -1440 as anchor instead of UNIX /// epoch used in the original algorithm. pub(crate) const fn to_uefi(dur: &Duration, timezone: i16, daylight: u8) -> Option

immintrin.h
+ Arithmetic + + + + + + + + Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[64:0] := a[63:0] + b[63:0] + (c_in > 0 ? 1 : 0) +MEM[out+63:out] := tmp[63:0] +dst[0] := tmp[64] +dst[7:1] := 0 + + + + ADX +
immintrin.h
+ Arithmetic +
+ + + + + Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"." + a[127:0] := ShiftRows(a[127:0]) +a[127:0] := SubBytes(a[127:0]) +a[127:0] := MixColumns(a[127:0]) +dst[127:0] := a[127:0] XOR RoundKey[127:0] + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"." + a[127:0] := ShiftRows(a[127:0]) +a[127:0] := SubBytes(a[127:0]) +dst[127:0] := a[127:0] XOR RoundKey[127:0] + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst". + a[127:0] := InvShiftRows(a[127:0]) +a[127:0] := InvSubBytes(a[127:0]) +a[127:0] := InvMixColumns(a[127:0]) +dst[127:0] := a[127:0] XOR RoundKey[127:0] + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst". + a[127:0] := InvShiftRows(a[127:0]) +a[127:0] := InvSubBytes(a[127:0]) +dst[127:0] := a[127:0] XOR RoundKey[127:0] + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + Perform the InvMixColumns transformation on "a" and store the result in "dst". + dst[127:0] := InvMixColumns(a[127:0]) + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from "a" and an 8-bit round constant specified in "imm8", and store the result in "dst"." + X3[31:0] := a[127:96] +X2[31:0] := a[95:64] +X1[31:0] := a[63:32] +X0[31:0] := a[31:0] +RCON[31:0] := ZeroExtend32(imm8[7:0]) +dst[31:0] := SubWord(X1) +dst[63:32] := RotWord(SubWord(X1)) XOR RCON +dst[95:64] := SubWord(X3) +dst[127:96] := RotWord(SubWord(X3)) XOR RCON + + + AES +
wmmintrin.h
+ Cryptography +
+ + + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in tiles "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(a.row[m].bf16[2*k+0]) * FP32(b.row[k].bf16[2*n+0]) + tmp.fp32[n] += FP32(a.row[m].bf16[2*k+1]) * FP32(b.row[k].bf16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-BF16 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of BF16 (16-bit) floating-point pairs in tiles "src0" and "src1", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(src0.row[m].bf16[2*k+0]) * FP32(src1.row[k].bf16[2*n+0]) + tmp.fp32[n] += FP32(src0.row[m].bf16[2*k+1]) * FP32(src1.row[k].bf16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-BF16 +
immintrin.h
+ Application-Targeted +
+ + + + + + + Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "a" and "b" is interpreted as a complex number with FP16 real part and FP16 imaginary part. Calculates the imaginary part of the result. For each possible combination of (row of "a", column of "b"), it performs a set of multiplication and accumulations on all corresponding complex numbers (one from "a" and one from "b"). The imaginary part of the "a" element is multiplied with the real part of the corresponding "b" element, and the real part of the "a" element is multiplied with the imaginary part of the corresponding "b" elements. The two accumulated results are added, and then accumulated into the corresponding row and column of "dst". + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-COMPLEX +
immintrin.h
+ Application-Targeted +
+ + + + + + Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "a" and "b" is interpreted as a complex number with FP16 real part and FP16 imaginary part. Calculates the real part of the result. For each possible combination of (row of "a", column of "b"), it performs a set of multiplication and accumulations on all corresponding complex numbers (one from "a" and one from "b"). The real part of the "a" element is multiplied with the real part of the corresponding "b" element, and the negated imaginary part of the "a" element is multiplied with the imaginary part of the corresponding "b" elements. The two accumulated results are added, and then accumulated into the corresponding row and column of "dst". + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) + tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-COMPLEX +
immintrin.h
+ Application-Targeted +
+ + + Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "src0" and "src1" is interpreted as a complex number with FP16 real part and FP16 imaginary part. This function calculates the imaginary part of the result. + + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+1]) + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+0]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-COMPLEX +
immintrin.h
+ Application-Targeted +
+ + + Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles src0 and src1 is interpreted as a complex number with FP16 real part and FP16 imaginary part. This function calculates the real part of the result. + + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+0]) + tmp.fp32[n] += FP32(-src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-COMPLEX +
immintrin.h
+ Application-Targeted +
+ + + + + + + Compute dot-product of FP16 (16-bit) floating-point pairs in tiles "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) + tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-FP16 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of FP16 (16-bit) floating-point pairs in tiles "src0" and "src1", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+0]) + tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+1]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-FP16 +
immintrin.h
+ Application-Targeted +
+ + + + + + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "a" with corresponding unsigned 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". + DEFINE DPBD(c, x, y) { + tmp1 := SignExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) + tmp2 := SignExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) + tmp3 := SignExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) + tmp4 := SignExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) + + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + + + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". + DEFINE DPBD(c, x, y) { + tmp1 := ZeroExtend32(x.byte[0]) * SignExtend32(y.byte[0]) + tmp2 := ZeroExtend32(x.byte[1]) * SignExtend32(y.byte[1]) + tmp3 := ZeroExtend32(x.byte[2]) * SignExtend32(y.byte[2]) + tmp4 := ZeroExtend32(x.byte[3]) * SignExtend32(y.byte[3]) + + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + + + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding unsigned 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". + DEFINE DPBD(c, x, y) { + tmp1 := ZeroExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) + tmp2 := ZeroExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) + tmp3 := ZeroExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) + tmp4 := ZeroExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) + + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + + + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". + DEFINE DPBD(c, x, y) { + tmp1 := SignExtend32(x.byte[0]) * SignExtend32(y.byte[0]) + tmp2 := SignExtend32(x.byte[1]) * SignExtend32(y.byte[1]) + tmp3 := SignExtend32(x.byte[2]) * SignExtend32(y.byte[2]) + tmp4 := SignExtend32(x.byte[3]) * SignExtend32(y.byte[3]) + + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (a.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "src0" with corresponding signed 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + DEFINE DPBD(c, x, y) { + tmp1 := SignExtend32(x.byte[0]) * SignExtend32(y.byte[0]) + tmp2 := SignExtend32(x.byte[1]) * SignExtend32(y.byte[1]) + tmp3 := SignExtend32(x.byte[2]) * SignExtend32(y.byte[2]) + tmp4 := SignExtend32(x.byte[3]) * SignExtend32(y.byte[3]) + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "src0" with corresponding unsigned 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + DEFINE DPBD(c, x, y) { + tmp1 := SignExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) + tmp2 := SignExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) + tmp3 := SignExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) + tmp4 := SignExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "src0" with corresponding signed 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + DEFINE DPBD(c, x, y) { + tmp1 := ZeroExtend32(x.byte[0]) * SignExtend32(y.byte[0]) + tmp2 := ZeroExtend32(x.byte[1]) * SignExtend32(y.byte[1]) + tmp3 := ZeroExtend32(x.byte[2]) * SignExtend32(y.byte[2]) + tmp4 := ZeroExtend32(x.byte[3]) * SignExtend32(y.byte[3]) + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "src0" with corresponding unsigned 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + DEFINE DPBD(c, x, y) { + tmp1 := ZeroExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) + tmp2 := ZeroExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) + tmp3 := ZeroExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) + tmp4 := ZeroExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) + RETURN c + tmp1 + tmp2 + tmp3 + tmp4 +} +FOR m := 0 TO dst.rows - 1 + tmp := dst.row[m] + FOR k := 0 TO (src0.colsb / 4) - 1 + FOR n := 0 TO (dst.colsb / 4) - 1 + tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) + ENDFOR + ENDFOR + write_row_and_zero(dst, m, tmp, dst.colsb) +ENDFOR +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-INT8 +
immintrin.h
+ Application-Targeted +
+ + + + + Load tile configuration from a 64-byte memory location specified by "mem_addr". The tile configuration format is specified below, and includes the tile type pallette, the number of bytes per row, and the number of rows. If the specified pallette_id is zero, that signifies the init state for both the tile config and the tile data, and the tiles are zeroed. Any invalid configurations will result in #GP fault. + +// format of memory payload. each field is a byte. +// 0: palette +// 1: start_row +// 2-15: reserved, must be zero +// 16-17: tile0.colsb +// 18-19: tile1.colsb +// 20-21: tile2.colsb +// ... +// 30-31: tile7.colsb +// 32-47: reserved, must be zero +// 48: tile0.rows +// 49: tile1.rows +// 50: tile2.rows +// ... +// 55: tile7.rows +// 56-63: reserved, must be zero + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + Stores the current tile configuration to a 64-byte memory location specified by "mem_addr". The tile configuration format is specified below, and includes the tile type pallette, the number of bytes per row, and the number of rows. If tiles are not configured, all zeroes will be stored to memory. + +// format of memory payload. each field is a byte. +// 0: palette +// 1: start_row +// 2-15: reserved, must be zero +// 16-17: tile0.colsb +// 18-19: tile1.colsb +// 20-21: tile2.colsb +// ... +// 30-31: tile7.colsb +// 32-47: reserved, must be zero +// 48: tile0.rows +// 49: tile1.rows +// 50: tile2.rows +// ... +// 55: tile7.rows +// 56-63: reserved, must be zero + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + + + Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". + start := tileconfig.startRow +IF start == 0 // not restarting, zero incoming state + tilezero(dst) +FI +nbytes := dst.colsb +DO WHILE start < dst.rows + memptr := base + start * stride + write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) + start := start + 1 +OD +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + + + Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly. + start := tileconfig.startRow +IF start == 0 // not restarting, zero incoming state + tilezero(dst) +FI +nbytes := dst.colsb +DO WHILE start < dst.rows + memptr := base + start * stride + write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) + start := start + 1 +OD +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Release the tile configuration to return to the init state, which releases all storage it currently holds. + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + + + Store the tile specified by "src" to memory specifieid by "base" address and "stride" using the tile configuration previously configured via "_tile_loadconfig". + start := tileconfig.startRow +DO WHILE start < src.rows + memptr := base + start * stride + write_memory(memptr, src.colsb, src.row[start]) + start := start + 1 +OD +zero_tileconfig_start() + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + Zero the tile specified by "tdest". + nbytes := palette_table[tileconfig.palette_id].bytes_per_row +FOR i := 0 TO palette_table[tileconfig.palette_id].max_rows-1 + FOR j := 0 TO nbytes-1 + tdest.row[i].byte[j] := 0 + ENDFOR +ENDFOR + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + start := tileconfig.startRow +IF start == 0 // not restarting, zero incoming state + tilezero(dst) +FI +nbytes := dst.colsb +DO WHILE start < dst.rows + memptr := base + start * stride + write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) + start := start + 1 +OD +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Store the tile specified by "src" to memory specifieid by "base" address and "stride". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + start := tileconfig.startRow +DO WHILE start < src.rows + memptr := base + start * stride + write_memory(memptr, src.colsb, src.row[start]) + start := start + 1 +OD +zero_tileconfig_start() + + + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly. The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + start := tileconfig.startRow +IF start == 0 // not restarting, zero incoming state + tilezero(dst) +FI +nbytes := dst.colsb +DO WHILE start < dst.rows + memptr := base + start * stride + write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) + start := start + 1 +OD +zero_upper_rows(dst, dst.rows) +zero_tileconfig_start() + + + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + Zero the tile specified by "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. + + nbytes := palette_table[tileconfig.palette_id].bytes_per_row +FOR i := 0 TO palette_table[tileconfig.palette_id].max_rows-1 + FOR j := 0 TO nbytes-1 + tdest.row[i].byte[j] := 0 + ENDFOR +ENDFOR + + + AMX-TILE +
immintrin.h
+ Application-Targeted +
+ + + + + Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ACOS(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ACOS(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ACOSH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ACOSH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ASIN(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ASIN(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ASINH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ASINH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ATAN(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ATAN(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ATANH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ATANH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := COSD(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := COSD(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := COSH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := COSH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) + MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SIND(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SIND(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SINH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SINH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := TAN(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := TAN(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := TAND(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := TAND(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := TANH(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := TANH(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Trigonometry +
+ + + + Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := CubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CEXP(a[31:0], b[31:0]) { + result[31:0] := POW(FP32(e), a[31:0]) * COS(b[31:0]) + result[63:32] := POW(FP32(e), a[31:0]) * SIN(b[31:0]) + RETURN result +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CEXP(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CLOG(a[31:0], b[31:0]) { + result[31:0] := LOG(SQRT(POW(a, 2.0) + POW(b, 2.0))) + result[63:32] := ATAN2(b, a) + RETURN result +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CLOG(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed complex snumbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CSQRT(a[31:0], b[31:0]) { + sign[31:0] := (b < 0.0) ? -FP32(1.0) : FP32(1.0) + result[31:0] := SQRT((a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) + result[63:32] := sign * SQRT((-a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) + RETURN result +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CSQRT(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(10.0, a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(2.0, a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := InvCubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := InvCubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := InvSQRT(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := InvSQRT(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LOG(1.0 + a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LOG(1.0 + a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := CDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := InverseCDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := InverseCDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ERF(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ERF(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := 1.0 - ERF(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+63:i] := 1.0 - ERF(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := 1.0 / ERF(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*32 + dst[i+63:i] := 1.0 / ERF(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Probability/Statistics +
+ + + + + Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 31 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 3 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 31 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 3 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 31 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 15 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 3 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 31 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 15 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 3 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Arithmetic +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ROUND(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := TRUNCATE(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Miscellaneous +
+ + + + Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := TRUNCATE(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Miscellaneous +
+ + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + b[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + b[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + dst[i+63:i] := a[i+63:i] / b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := a[i+31:i] / b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + + Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". + +DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { + FOR j := 0 to 3 + i := j*32 + IF imm8[(4+j)%8] + temp[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + temp[i+31:i] := FP32(0.0) + FI + ENDFOR + + sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0]) + + FOR j := 0 to 3 + i := j*32 + IF imm8[j%8] + tmpdst[i+31:i] := sum[31:0] + ELSE + tmpdst[i+31:i] := FP32(0.0) + FI + ENDFOR + RETURN tmpdst[127:0] +} +dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) +dst[255:128] := DP(a[255:128], b[255:128], imm8[7:0]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[63:0] := a[127:64] + a[63:0] +dst[127:64] := b[127:64] + b[63:0] +dst[191:128] := a[255:192] + a[191:128] +dst[255:192] := b[255:192] + b[191:128] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := a[127:96] + a[95:64] +dst[95:64] := b[63:32] + b[31:0] +dst[127:96] := b[127:96] + b[95:64] +dst[159:128] := a[191:160] + a[159:128] +dst[191:160] := a[255:224] + a[223:192] +dst[223:192] := b[191:160] + b[159:128] +dst[255:224] := b[255:224] + b[223:192] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[63:0] := a[63:0] - a[127:64] +dst[127:64] := b[63:0] - b[127:64] +dst[191:128] := a[191:128] - a[255:192] +dst[255:192] := b[191:128] - b[255:192] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := a[95:64] - a[127:96] +dst[95:64] := b[31:0] - b[63:32] +dst[127:96] := b[95:64] - b[127:96] +dst[159:128] := a[159:128] - a[191:160] +dst[191:160] := a[223:192] - a[255:224] +dst[223:192] := b[159:128] - b[191:160] +dst[255:224] := b[223:192] - b[255:224] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Arithmetic +
+ + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value. + +IF ((a[255:0] AND b[255:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[255:0]) AND b[255:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +RETURN ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value. + +IF ((a[255:0] AND b[255:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[255:0]) AND b[255:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +RETURN CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +IF ((a[255:0] AND b[255:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[255:0]) AND b[255:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[63] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +tmp[255:0] := a[255:0] AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[255:0] := (NOT a[255:0]) AND b[255:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ + tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := ZF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + AVX +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +tmp[127:0] := a[127:0] AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + ZF := 1 +ELSE + ZF := 0 +FI +tmp[127:0] := (NOT a[127:0]) AND b[127:0] +IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + AVX +
immintrin.h
+ Logical +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF imm8[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". + +dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract a 32-bit integer from "a", selected with "index", and store the result in "dst". + +dst[31:0] := (a[255:0] >> (index[2:0] * 32))[31:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Extract a 64-bit integer from "a", selected with "index", and store the result in "dst". + +dst[63:0] := (a[255:0] >> (index[1:0] * 64))[63:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], b[1:0]) +dst[63:32] := SELECT4(a[127:0], b[33:32]) +dst[95:64] := SELECT4(a[127:0], b[65:64]) +dst[127:96] := SELECT4(a[127:0], b[97:96]) +dst[159:128] := SELECT4(a[255:128], b[129:128]) +dst[191:160] := SELECT4(a[255:128], b[161:160]) +dst[223:192] := SELECT4(a[255:128], b[193:192]) +dst[255:224] := SELECT4(a[255:128], b[225:224]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], b[1:0]) +dst[63:32] := SELECT4(a[127:0], b[33:32]) +dst[95:64] := SELECT4(a[127:0], b[65:64]) +dst[127:96] := SELECT4(a[127:0], b[97:96]) +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". + +IF (b[1] == 0) dst[63:0] := a[63:0]; FI +IF (b[1] == 1) dst[63:0] := a[127:64]; FI +IF (b[65] == 0) dst[127:64] := a[63:0]; FI +IF (b[65] == 1) dst[127:64] := a[127:64]; FI +IF (b[129] == 0) dst[191:128] := a[191:128]; FI +IF (b[129] == 1) dst[191:128] := a[255:192]; FI +IF (b[193] == 0) dst[255:192] := a[191:128]; FI +IF (b[193] == 1) dst[255:192] := a[255:192]; FI +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst". + +IF (b[1] == 0) dst[63:0] := a[63:0]; FI +IF (b[1] == 1) dst[63:0] := a[127:64]; FI +IF (b[65] == 0) dst[127:64] := a[63:0]; FI +IF (b[65] == 1) dst[127:64] := a[127:64]; FI +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". + +IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src1, src2, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src1[127:0] + 1: tmp[127:0] := src1[255:128] + 2: tmp[127:0] := src2[127:0] + 3: tmp[127:0] := src2[255:128] + ESAC + IF control[3] + tmp[127:0] := 0 + FI + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) +dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src1, src2, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src1[127:0] + 1: tmp[127:0] := src1[255:128] + 2: tmp[127:0] := src2[127:0] + 3: tmp[127:0] := src2[255:128] + ESAC + IF control[3] + tmp[127:0] := 0 + FI + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) +dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src1, src2, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src1[127:0] + 1: tmp[127:0] := src1[255:128] + 2: tmp[127:0] := src2[127:0] + 3: tmp[127:0] := src2[255:128] + ESAC + IF control[3] + tmp[127:0] := 0 + FI + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) +dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE imm8[0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 8-bit integer "i" into "dst" at the location specified by "index". + +dst[255:0] := a[255:0] +sel := index[4:0]*8 +dst[sel+7:sel] := i[7:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "index". + +dst[255:0] := a[255:0] +sel := index[3:0]*16 +dst[sel+15:sel] := i[15:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "index". + +dst[255:0] := a[255:0] +sel := index[2:0]*32 +dst[sel+31:sel] := i[31:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "index". + +dst[255:0] := a[255:0] +sel := index[1:0]*64 +dst[sel+63:sel] := i[63:0] + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Swizzle +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst". + [round_note] + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i], rounding) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ROUND(a[i+31:i], rounding) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +dst[63:0] := ( a[63:0] OP b[63:0] ) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +dst[31:0] := ( a[31:0] OP b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Compare +
+ + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + m := j*64 + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 32*j + dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Convert +
+ + + + Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". + +dst[31:0] := a[31:0] + + + AVX +
immintrin.h
+ Convert +
+ + + + Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". + +dst[63:0] := a[63:0] + + + AVX +
immintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + AVX +
immintrin.h
+ Convert +
+ + + + Zero the contents of all XMM or YMM registers. + YMM0[MAX:0] := 0 +YMM1[MAX:0] := 0 +YMM2[MAX:0] := 0 +YMM3[MAX:0] := 0 +YMM4[MAX:0] := 0 +YMM5[MAX:0] := 0 +YMM6[MAX:0] := 0 +YMM7[MAX:0] := 0 +IF _64_BIT_MODE + YMM8[MAX:0] := 0 + YMM9[MAX:0] := 0 + YMM10[MAX:0] := 0 + YMM11[MAX:0] := 0 + YMM12[MAX:0] := 0 + YMM13[MAX:0] := 0 + YMM14[MAX:0] := 0 + YMM15[MAX:0] := 0 +FI + + + AVX +
immintrin.h
+ General Support +
+ + + + Zero the upper 128 bits of all YMM registers; the lower 128-bits of the registers are unmodified. + YMM0[MAX:128] := 0 +YMM1[MAX:128] := 0 +YMM2[MAX:128] := 0 +YMM3[MAX:128] := 0 +YMM4[MAX:128] := 0 +YMM5[MAX:128] := 0 +YMM6[MAX:128] := 0 +YMM7[MAX:128] := 0 +IF _64_BIT_MODE + YMM8[MAX:128] := 0 + YMM9[MAX:128] := 0 + YMM10[MAX:128] := 0 + YMM11[MAX:128] := 0 + YMM12[MAX:128] := 0 + YMM13[MAX:128] := 0 + YMM14[MAX:128] := 0 + YMM15[MAX:128] := 0 +FI + + + AVX +
immintrin.h
+ General Support +
+ + + + Return vector of type __m256 with undefined elements. + AVX +
immintrin.h
+ General Support +
+ + + + Return vector of type __m256d with undefined elements. + AVX +
immintrin.h
+ General Support +
+ + + + Return vector of type __m256i with undefined elements. + AVX +
immintrin.h
+ General Support +
+ + + + Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst". + +tmp[31:0] := MEM[mem_addr+31:mem_addr] +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := tmp[31:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + Swizzle + + + Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst". + +tmp[31:0] := MEM[mem_addr+31:mem_addr] +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := tmp[31:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Load +
+ + Swizzle + + + Broadcast a double-precision (64-bit) floating-point element from memory to all elements of "dst". + +tmp[63:0] := MEM[mem_addr+63:mem_addr] +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := tmp[63:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + Swizzle + + + Broadcast 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of "dst". + +tmp[127:0] := MEM[mem_addr+127:mem_addr] +dst[127:0] := tmp[127:0] +dst[255:128] := tmp[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + Swizzle + + + Broadcast 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of "dst". + +tmp[127:0] := MEM[mem_addr+127:mem_addr] +dst[127:0] := tmp[127:0] +dst[255:128] := tmp[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits of integer data from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits of integer data from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + Load 256-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm256_loadu_si256" when the data crosses a cache line boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Load +
+ + + + + Load two 128-bit values (composed of 4 packed single-precision (32-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst". + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +dst[127:0] := MEM[loaddr+127:loaddr] +dst[255:128] := MEM[hiaddr+127:hiaddr] +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Load +
+ + + + + Load two 128-bit values (composed of 2 packed double-precision (64-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst". + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +dst[127:0] := MEM[loaddr+127:loaddr] +dst[255:128] := MEM[hiaddr+127:hiaddr] +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Load +
+ + + + + Load two 128-bit values (composed of integer data) from memory, and combine them into a 256-bit value in "dst". + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +dst[127:0] := MEM[loaddr+127:loaddr] +dst[255:128] := MEM[hiaddr+127:hiaddr] +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Load +
+ + + + + Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits of integer data from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits of integer data from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask". + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask". + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask". + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask". + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits of integer data from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX +
immintrin.h
+ Store +
+ + + + + + Store the high and low 128-bit halves (each composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory two different 128-bit locations. + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +MEM[loaddr+127:loaddr] := a[127:0] +MEM[hiaddr+127:hiaddr] := a[255:128] + + AVX +
immintrin.h
+ Store +
+ + + + + + Store the high and low 128-bit halves (each composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory two different 128-bit locations. + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +MEM[loaddr+127:loaddr] := a[127:0] +MEM[hiaddr+127:hiaddr] := a[255:128] + + AVX +
immintrin.h
+ Store +
+ + + + + + Store the high and low 128-bit halves (each composed of integer data) from "a" into memory two different 128-bit locations. + "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. + +MEM[loaddr+127:loaddr] := a[127:0] +MEM[hiaddr+127:hiaddr] := a[255:128] + + AVX +
immintrin.h
+ Store +
+ + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := a[63:32] +dst[95:64] := a[127:96] +dst[127:96] := a[127:96] +dst[159:128] := a[191:160] +dst[191:160] := a[191:160] +dst[223:192] := a[255:224] +dst[255:224] := a[255:224] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Move +
+ + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := a[31:0] +dst[95:64] := a[95:64] +dst[127:96] := a[95:64] +dst[159:128] := a[159:128] +dst[191:160] := a[159:128] +dst[223:192] := a[223:192] +dst[255:224] := a[223:192] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Move +
+ + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst". + +dst[63:0] := a[63:0] +dst[127:64] := a[63:0] +dst[191:128] := a[191:128] +dst[255:192] := a[191:128] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Move +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := 1.0 / a[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Elementary Math Functions +
+ + + + Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a". + +FOR j := 0 to 3 + i := j*64 + IF a[i+63] + dst[j] := 1 + ELSE + dst[j] := 0 + FI +ENDFOR +dst[MAX:4] := 0 + + + AVX +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a". + +FOR j := 0 to 7 + i := j*32 + IF a[i+31] + dst[j] := 1 + ELSE + dst[j] := 0 + FI +ENDFOR +dst[MAX:8] := 0 + + + AVX +
immintrin.h
+ Miscellaneous +
+ + + + Return vector of type __m256d with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + Return vector of type __m256 with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + Return vector of type __m256i with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 +dst[191:128] := e2 +dst[255:192] := e3 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 +dst[159:128] := e4 +dst[191:160] := e5 +dst[223:192] := e6 +dst[255:224] := e7 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values. + +dst[7:0] := e0 +dst[15:8] := e1 +dst[23:16] := e2 +dst[31:24] := e3 +dst[39:32] := e4 +dst[47:40] := e5 +dst[55:48] := e6 +dst[63:56] := e7 +dst[71:64] := e8 +dst[79:72] := e9 +dst[87:80] := e10 +dst[95:88] := e11 +dst[103:96] := e12 +dst[111:104] := e13 +dst[119:112] := e14 +dst[127:120] := e15 +dst[135:128] := e16 +dst[143:136] := e17 +dst[151:144] := e18 +dst[159:152] := e19 +dst[167:160] := e20 +dst[175:168] := e21 +dst[183:176] := e22 +dst[191:184] := e23 +dst[199:192] := e24 +dst[207:200] := e25 +dst[215:208] := e26 +dst[223:216] := e27 +dst[231:224] := e28 +dst[239:232] := e29 +dst[247:240] := e30 +dst[255:248] := e31 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values. + +dst[15:0] := e0 +dst[31:16] := e1 +dst[47:32] := e2 +dst[63:48] := e3 +dst[79:64] := e4 +dst[95:80] := e5 +dst[111:96] := e6 +dst[127:112] := e7 +dst[143:128] := e8 +dst[159:144] := e9 +dst[175:160] := e10 +dst[191:176] := e11 +dst[207:192] := e12 +dst[223:208] := e13 +dst[239:224] := e14 +dst[255:240] := e15 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed 32-bit integers in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 +dst[159:128] := e4 +dst[191:160] := e5 +dst[223:192] := e6 +dst[255:224] := e7 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + Set packed 64-bit integers in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 +dst[191:128] := e2 +dst[255:192] := e3 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[63:0] := e3 +dst[127:64] := e2 +dst[191:128] := e1 +dst[255:192] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[31:0] := e7 +dst[63:32] := e6 +dst[95:64] := e5 +dst[127:96] := e4 +dst[159:128] := e3 +dst[191:160] := e2 +dst[223:192] := e1 +dst[255:224] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values in reverse order. + +dst[7:0] := e31 +dst[15:8] := e30 +dst[23:16] := e29 +dst[31:24] := e28 +dst[39:32] := e27 +dst[47:40] := e26 +dst[55:48] := e25 +dst[63:56] := e24 +dst[71:64] := e23 +dst[79:72] := e22 +dst[87:80] := e21 +dst[95:88] := e20 +dst[103:96] := e19 +dst[111:104] := e18 +dst[119:112] := e17 +dst[127:120] := e16 +dst[135:128] := e15 +dst[143:136] := e14 +dst[151:144] := e13 +dst[159:152] := e12 +dst[167:160] := e11 +dst[175:168] := e10 +dst[183:176] := e9 +dst[191:184] := e8 +dst[199:192] := e7 +dst[207:200] := e6 +dst[215:208] := e5 +dst[223:216] := e4 +dst[231:224] := e3 +dst[239:232] := e2 +dst[247:240] := e1 +dst[255:248] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values in reverse order. + +dst[15:0] := e15 +dst[31:16] := e14 +dst[47:32] := e13 +dst[63:48] := e12 +dst[79:64] := e11 +dst[95:80] := e10 +dst[111:96] := e9 +dst[127:112] := e8 +dst[143:128] := e7 +dst[159:144] := e6 +dst[175:160] := e5 +dst[191:176] := e4 +dst[207:192] := e3 +dst[223:208] := e2 +dst[239:224] := e1 +dst[255:240] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed 32-bit integers in "dst" with the supplied values in reverse order. + +dst[31:0] := e7 +dst[63:32] := e6 +dst[95:64] := e5 +dst[127:96] := e4 +dst[159:128] := e3 +dst[191:160] := e2 +dst[223:192] := e1 +dst[255:224] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + + + Set packed 64-bit integers in "dst" with the supplied values in reverse order. + +dst[63:0] := e3 +dst[127:64] := e2 +dst[191:128] := e1 +dst[255:192] := e0 +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastb". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate the "vpbroadcastw". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastd". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:256] := 0 + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256 vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256d vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256i vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256 vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256d vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + + Set packed __m256i vector "dst" with the supplied values. + +dst[127:0] := lo[127:0] +dst[255:128] := hi[127:0] +dst[MAX:256] := 0 + + + AVX +
immintrin.h
+ Set +
+ + + + Cast vector of type __m256d to type __m256. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m256d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256d to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m256; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m256; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX +
immintrin.h
+ Cast +
+ + + + + + + Extract an 8-bit integer from "a", selected with "index", and store the result in "dst". + +dst[7:0] := (a[255:0] >> (index[4:0] * 8))[7:0] + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Extract a 16-bit integer from "a", selected with "index", and store the result in "dst". + +dst[15:0] := (a[255:0] >> (index[3:0] * 16))[15:0] + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 16-bit integers from "a" and "b" within 128-bit lanes using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF imm8[j%8] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + IF mask[i+7] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst". + +dst[127:0] := a[127:0] +dst[255:128] := a[127:0] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst". + +dst[127:0] := a[127:0] +dst[255:128] := a[127:0] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of integer data) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src1, src2, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src1[127:0] + 1: tmp[127:0] := src1[255:128] + 2: tmp[127:0] := src2[127:0] + 3: tmp[127:0] := src2[255:128] + ESAC + IF control[3] + tmp[127:0] := 0 + FI + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) +dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 8-bit integers in "a" within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[3:0] := b[i+3:i] + dst[i+7:i] := a[index*8+7:index*8] + FI + IF b[128+i+7] == 1 + dst[128+i+7:128+i] := 0 + ELSE + index[3:0] := b[128+i+3:128+i] + dst[128+i+7:128+i] := a[128+index*8+7:128+index*8] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst". + +dst[63:0] := a[63:0] +dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +dst[191:128] := a[191:128] +dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst". + +dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +dst[127:64] := a[127:64] +dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +dst[255:192] := a[255:192] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Swizzle +
+ + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := ABS(a[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ABS(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ABS(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Special Math Functions +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[31:16] + a[15:0] +dst[31:16] := a[63:48] + a[47:32] +dst[47:32] := a[95:80] + a[79:64] +dst[63:48] := a[127:112] + a[111:96] +dst[79:64] := b[31:16] + b[15:0] +dst[95:80] := b[63:48] + b[47:32] +dst[111:96] := b[95:80] + b[79:64] +dst[127:112] := b[127:112] + b[111:96] +dst[143:128] := a[159:144] + a[143:128] +dst[159:144] := a[191:176] + a[175:160] +dst[175:160] := a[223:208] + a[207:192] +dst[191:176] := a[255:240] + a[239:224] +dst[207:192] := b[159:144] + b[143:128] +dst[223:208] := b[191:176] + b[175:160] +dst[239:224] := b[223:208] + b[207:192] +dst[255:240] := b[255:240] + b[239:224] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := a[127:96] + a[95:64] +dst[95:64] := b[63:32] + b[31:0] +dst[127:96] := b[127:96] + b[95:64] +dst[159:128] := a[191:160] + a[159:128] +dst[191:160] := a[255:224] + a[223:192] +dst[223:192] := b[191:160] + b[159:128] +dst[255:224] := b[255:224] + b[223:192] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[31:16] + a[15:0]) +dst[31:16] := Saturate16(a[63:48] + a[47:32]) +dst[47:32] := Saturate16(a[95:80] + a[79:64]) +dst[63:48] := Saturate16(a[127:112] + a[111:96]) +dst[79:64] := Saturate16(b[31:16] + b[15:0]) +dst[95:80] := Saturate16(b[63:48] + b[47:32]) +dst[111:96] := Saturate16(b[95:80] + b[79:64]) +dst[127:112] := Saturate16(b[127:112] + b[111:96]) +dst[143:128] := Saturate16(a[159:144] + a[143:128]) +dst[159:144] := Saturate16(a[191:176] + a[175:160]) +dst[175:160] := Saturate16(a[223:208] + a[207:192]) +dst[191:176] := Saturate16(a[255:240] + a[239:224]) +dst[207:192] := Saturate16(b[159:144] + b[143:128]) +dst[223:208] := Saturate16(b[191:176] + b[175:160]) +dst[239:224] := Saturate16(b[223:208] + b[207:192]) +dst[255:240] := Saturate16(b[255:240] + b[239:224]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[15:0] - a[31:16] +dst[31:16] := a[47:32] - a[63:48] +dst[47:32] := a[79:64] - a[95:80] +dst[63:48] := a[111:96] - a[127:112] +dst[79:64] := b[15:0] - b[31:16] +dst[95:80] := b[47:32] - b[63:48] +dst[111:96] := b[79:64] - b[95:80] +dst[127:112] := b[111:96] - b[127:112] +dst[143:128] := a[143:128] - a[159:144] +dst[159:144] := a[175:160] - a[191:176] +dst[175:160] := a[207:192] - a[223:208] +dst[191:176] := a[239:224] - a[255:240] +dst[207:192] := b[143:128] - b[159:144] +dst[223:208] := b[175:160] - b[191:176] +dst[239:224] := b[207:192] - b[223:208] +dst[255:240] := b[239:224] - b[255:240] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := a[95:64] - a[127:96] +dst[95:64] := b[31:0] - b[63:32] +dst[127:96] := b[95:64] - b[127:96] +dst[159:128] := a[159:128] - a[191:160] +dst[191:160] := a[223:192] - a[255:224] +dst[223:192] := b[159:128] - b[191:160] +dst[255:224] := b[223:192] - b[255:224] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[15:0] - a[31:16]) +dst[31:16] := Saturate16(a[47:32] - a[63:48]) +dst[47:32] := Saturate16(a[79:64] - a[95:80]) +dst[63:48] := Saturate16(a[111:96] - a[127:112]) +dst[79:64] := Saturate16(b[15:0] - b[31:16]) +dst[95:80] := Saturate16(b[47:32] - b[63:48]) +dst[111:96] := Saturate16(b[79:64] - b[95:80]) +dst[127:112] := Saturate16(b[111:96] - b[127:112]) +dst[143:128] := Saturate16(a[143:128] - a[159:144]) +dst[159:144] := Saturate16(a[175:160] - a[191:176]) +dst[175:160] := Saturate16(a[207:192] - a[223:208]) +dst[191:176] := Saturate16(a[239:224] - a[255:240]) +dst[207:192] := Saturate16(b[143:128] - b[159:144]) +dst[223:208] := Saturate16(b[175:160] - b[191:176]) +dst[239:224] := Saturate16(b[207:192] - b[223:208]) +dst[255:240] := Saturate16(b[239:224] - b[255:240]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*32 + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". + +FOR j := 0 to 31 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +FOR j := 0 to 3 + i := j*64 + dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] + dst[i+63:i+16] := 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Negate packed signed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 31 + i := j*8 + IF b[i+7:i] < 0 + dst[i+7:i] := -(a[i+7:i]) + ELSE IF b[i+7:i] == 0 + dst[i+7:i] := 0 + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Negate packed signed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 15 + i := j*16 + IF b[i+15:i] < 0 + dst[i+15:i] := -(a[i+15:i]) + ELSE IF b[i+15:i] == 0 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Negate packed signed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 7 + i := j*32 + IF b[i+31:i] < 0 + dst[i+31:i] := -(a[i+31:i]) + ELSE IF b[i+31:i] == 0 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Arithmetic +
+ + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". + +FOR j := 0 to 1 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + dst[i+127:i] := tmp[127:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[j] := a[i+7] +ENDFOR + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Eight SADs are performed for each 128-bit lane using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8". + +DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) { + a_offset := imm8[2]*32 + b_offset := imm8[1:0]*32 + FOR j := 0 to 7 + i := j*8 + k := a_offset+i + l := b_offset + tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \ + ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24])) + ENDFOR + RETURN tmp[127:0] +} +dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0]) +dst[255:128] := MPSADBW(a[255:128], b[255:128], imm8[5:3]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(a[79:64]) +dst[47:40] := Saturate8(a[95:80]) +dst[55:48] := Saturate8(a[111:96]) +dst[63:56] := Saturate8(a[127:112]) +dst[71:64] := Saturate8(b[15:0]) +dst[79:72] := Saturate8(b[31:16]) +dst[87:80] := Saturate8(b[47:32]) +dst[95:88] := Saturate8(b[63:48]) +dst[103:96] := Saturate8(b[79:64]) +dst[111:104] := Saturate8(b[95:80]) +dst[119:112] := Saturate8(b[111:96]) +dst[127:120] := Saturate8(b[127:112]) +dst[135:128] := Saturate8(a[143:128]) +dst[143:136] := Saturate8(a[159:144]) +dst[151:144] := Saturate8(a[175:160]) +dst[159:152] := Saturate8(a[191:176]) +dst[167:160] := Saturate8(a[207:192]) +dst[175:168] := Saturate8(a[223:208]) +dst[183:176] := Saturate8(a[239:224]) +dst[191:184] := Saturate8(a[255:240]) +dst[199:192] := Saturate8(b[143:128]) +dst[207:200] := Saturate8(b[159:144]) +dst[215:208] := Saturate8(b[175:160]) +dst[223:216] := Saturate8(b[191:176]) +dst[231:224] := Saturate8(b[207:192]) +dst[239:232] := Saturate8(b[223:208]) +dst[247:240] := Saturate8(b[239:224]) +dst[255:248] := Saturate8(b[255:240]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(a[95:64]) +dst[63:48] := Saturate16(a[127:96]) +dst[79:64] := Saturate16(b[31:0]) +dst[95:80] := Saturate16(b[63:32]) +dst[111:96] := Saturate16(b[95:64]) +dst[127:112] := Saturate16(b[127:96]) +dst[143:128] := Saturate16(a[159:128]) +dst[159:144] := Saturate16(a[191:160]) +dst[175:160] := Saturate16(a[223:192]) +dst[191:176] := Saturate16(a[255:224]) +dst[207:192] := Saturate16(b[159:128]) +dst[223:208] := Saturate16(b[191:160]) +dst[239:224] := Saturate16(b[223:192]) +dst[255:240] := Saturate16(b[255:224]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(a[79:64]) +dst[47:40] := SaturateU8(a[95:80]) +dst[55:48] := SaturateU8(a[111:96]) +dst[63:56] := SaturateU8(a[127:112]) +dst[71:64] := SaturateU8(b[15:0]) +dst[79:72] := SaturateU8(b[31:16]) +dst[87:80] := SaturateU8(b[47:32]) +dst[95:88] := SaturateU8(b[63:48]) +dst[103:96] := SaturateU8(b[79:64]) +dst[111:104] := SaturateU8(b[95:80]) +dst[119:112] := SaturateU8(b[111:96]) +dst[127:120] := SaturateU8(b[127:112]) +dst[135:128] := SaturateU8(a[143:128]) +dst[143:136] := SaturateU8(a[159:144]) +dst[151:144] := SaturateU8(a[175:160]) +dst[159:152] := SaturateU8(a[191:176]) +dst[167:160] := SaturateU8(a[207:192]) +dst[175:168] := SaturateU8(a[223:208]) +dst[183:176] := SaturateU8(a[239:224]) +dst[191:184] := SaturateU8(a[255:240]) +dst[199:192] := SaturateU8(b[143:128]) +dst[207:200] := SaturateU8(b[159:144]) +dst[215:208] := SaturateU8(b[175:160]) +dst[223:216] := SaturateU8(b[191:176]) +dst[231:224] := SaturateU8(b[207:192]) +dst[239:232] := SaturateU8(b[223:208]) +dst[247:240] := SaturateU8(b[239:224]) +dst[255:248] := SaturateU8(b[255:240]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". + +dst[15:0] := SaturateU16(a[31:0]) +dst[31:16] := SaturateU16(a[63:32]) +dst[47:32] := SaturateU16(a[95:64]) +dst[63:48] := SaturateU16(a[127:96]) +dst[79:64] := SaturateU16(b[31:0]) +dst[95:80] := SaturateU16(b[63:32]) +dst[111:96] := SaturateU16(b[95:64]) +dst[127:112] := SaturateU16(b[127:96]) +dst[143:128] := SaturateU16(a[159:128]) +dst[159:144] := SaturateU16(a[191:160]) +dst[175:160] := SaturateU16(a[223:192]) +dst[191:176] := SaturateU16(a[255:224]) +dst[207:192] := SaturateU16(b[159:128]) +dst[223:208] := SaturateU16(b[191:160]) +dst[239:224] := SaturateU16(b[223:192]) +dst[255:240] := SaturateU16(b[255:224]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Miscellaneous +
+ + + + + Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[255:0] := (a[255:0] AND b[255:0]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 256 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[255:0] := ((NOT a[255:0]) AND b[255:0]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[255:0] := (a[255:0] OR b[255:0]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[255:0] := (a[255:0] XOR b[255:0]) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Logical +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Probability/Statistics +
+ + + + + Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ( a[i+63:i] > b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Compare +
+ + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j:= 0 to 7 + i := 32*j + k := 16*j + dst[i+31:i] := SignExtend32(a[k+15:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j:= 0 to 3 + i := 64*j + k := 16*j + dst[i+63:i] := SignExtend64(a[k+15:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j:= 0 to 3 + i := 64*j + k := 32*j + dst[i+63:i] := SignExtend64(a[k+31:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + l := j*16 + dst[l+15:l] := SignExtend16(a[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[i+31:i] := SignExtend32(a[k+7:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[i+63:i] := SignExtend64(a[k+7:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 16*j + dst[i+31:i] := ZeroExtend32(a[k+15:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j:= 0 to 3 + i := 64*j + k := 16*j + dst[i+63:i] := ZeroExtend64(a[k+15:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j:= 0 to 3 + i := 64*j + k := 32*j + dst[i+63:i] := ZeroExtend64(a[k+31:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + l := j*16 + dst[l+15:l] := ZeroExtend16(a[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[i+31:i] := ZeroExtend32(a[k+7:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[i+63:i] := ZeroExtend64(a[k+7:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Convert +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:64] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:64] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF mask[i+63] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF mask[i+63] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF mask[i+31] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF mask[i+31] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF mask[i+31] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF mask[i+31] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF mask[i+63] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF mask[i+63] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF mask[i+63] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF mask[i+63] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF mask[i+31] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:64] := 0 +dst[MAX:64] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF mask[i+31] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF mask[i+31] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:64] := 0 +dst[MAX:64] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF mask[i+31] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF mask[i+63] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:128] := 0 +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF mask[i+63] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +mask[MAX:256] := 0 +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + Load 256-bits of integer data from memory into "dst" using a non-temporal memory hint. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Load +
+ + + + + + Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX2 +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 7 + i := j*32 + IF mask[i+31] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX2 +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX2 +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 3 + i := j*64 + IF mask[i+63] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX2 +
immintrin.h
+ Store +
+ + + + + Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) +dst[255:128] := a[255:128] << (tmp*8) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) +dst[255:128] := a[255:128] << (tmp*8) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) +dst[255:128] := a[255:128] >> (tmp*8) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) +dst[255:128] := a[255:128] >> (tmp*8) +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX2 +
immintrin.h
+ Shift +
+ + + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 1 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 3 + i := j*64 + dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 1 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 3 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 1 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 3 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +tmp.dword[0] := b.dword[ imm8[1:0] ] +tmp.dword[1] := b.dword[ imm8[3:2] ] +tmp.dword[2] := b.dword[ imm8[5:4] ] +tmp.dword[3] := b.dword[ imm8[7:6] ] +FOR j := 0 to 1 + i := j*64 + dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +tmp.dword[0] := b.dword[ imm8[1:0] ] +tmp.dword[1] := b.dword[ imm8[3:2] ] +tmp.dword[2] := b.dword[ imm8[5:4] ] +tmp.dword[3] := b.dword[ imm8[7:6] ] +FOR j := 0 to 1 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +tmp.dword[0] := b.dword[ imm8[1:0] ] +tmp.dword[1] := b.dword[ imm8[3:2] ] +tmp.dword[2] := b.dword[ imm8[5:4] ] +tmp.dword[3] := b.dword[ imm8[7:6] ] +FOR j := 0 to 1 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + tmp_dst[i+127:i] := tmp[127:0] +ENDFOR +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + tmp_dst[i+127:i] := tmp[127:0] +ENDFOR +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + off := 16*idx[i+3:i] + dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := idx[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + off := 16*idx[i+3:i] + dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + off := 16*idx[i+3:i] + dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + off := 16*idx[i+3:i] + dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + off := 16*idx[i+2:i] + dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := idx[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + off := 16*idx[i+2:i] + dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + off := 16*idx[i+2:i] + dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + off := 16*idx[i+2:i] + dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + id := idx[i+3:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + id := idx[i+3:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + id := idx[i+3:i]*16 + dst[i+15:i] := a[id+15:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + id := idx[i+2:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + id := idx[i+2:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + id := idx[i+2:i]*16 + dst[i+15:i] := a[id+15:id] +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". + +FOR j := 0 to 31 + i := j*8 + IF a[i+7] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". + +FOR j := 0 to 15 + i := j*8 + IF a[i+7] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := 0xFF + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := 0xFF + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := 0xFFFF + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := 0xFFFF + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". + +FOR j := 0 to 15 + i := j*16 + IF a[i+15] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". + +FOR j := 0 to 7 + i := j*16 + IF a[i+15] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[4:0] := b[i+3:i] + (j & 0x10) + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[4:0] := b[i+3:i] + (j & 0x10) + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[3:0] := b[i+3:i] + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[3:0] := b[i+3:i] + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +tmp_dst[191:128] := a[191:128] +tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +tmp_dst[191:128] := a[191:128] +tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +tmp_dst[255:192] := a[255:192] +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +tmp_dst[255:192] := a[255:192] +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 16 packed 16-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 32 packed 8-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 8 packed 16-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 16 packed 8-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Store packed 16-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*16 + IF k[j] + MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 16-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*16 + IF k[j] + MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 8-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*8 + IF k[j] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 8-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*8 + IF k[j] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 16 packed 16-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 32 packed 8-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 8 packed 16-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 16 packed 8-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512BW + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +tmp_dst[143:128] := Saturate16(a[159:128]) +tmp_dst[159:144] := Saturate16(a[191:160]) +tmp_dst[175:160] := Saturate16(a[223:192]) +tmp_dst[191:176] := Saturate16(a[255:224]) +tmp_dst[207:192] := Saturate16(b[159:128]) +tmp_dst[223:208] := Saturate16(b[191:160]) +tmp_dst[239:224] := Saturate16(b[223:192]) +tmp_dst[255:240] := Saturate16(b[255:224]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +tmp_dst[143:128] := Saturate16(a[159:128]) +tmp_dst[159:144] := Saturate16(a[191:160]) +tmp_dst[175:160] := Saturate16(a[223:192]) +tmp_dst[191:176] := Saturate16(a[255:224]) +tmp_dst[207:192] := Saturate16(b[159:128]) +tmp_dst[223:208] := Saturate16(b[191:160]) +tmp_dst[239:224] := Saturate16(b[223:192]) +tmp_dst[255:240] := Saturate16(b[255:224]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +tmp_dst[135:128] := Saturate8(a[143:128]) +tmp_dst[143:136] := Saturate8(a[159:144]) +tmp_dst[151:144] := Saturate8(a[175:160]) +tmp_dst[159:152] := Saturate8(a[191:176]) +tmp_dst[167:160] := Saturate8(a[207:192]) +tmp_dst[175:168] := Saturate8(a[223:208]) +tmp_dst[183:176] := Saturate8(a[239:224]) +tmp_dst[191:184] := Saturate8(a[255:240]) +tmp_dst[199:192] := Saturate8(b[143:128]) +tmp_dst[207:200] := Saturate8(b[159:144]) +tmp_dst[215:208] := Saturate8(b[175:160]) +tmp_dst[223:216] := Saturate8(b[191:176]) +tmp_dst[231:224] := Saturate8(b[207:192]) +tmp_dst[239:232] := Saturate8(b[223:208]) +tmp_dst[247:240] := Saturate8(b[239:224]) +tmp_dst[255:248] := Saturate8(b[255:240]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +tmp_dst[135:128] := Saturate8(a[143:128]) +tmp_dst[143:136] := Saturate8(a[159:144]) +tmp_dst[151:144] := Saturate8(a[175:160]) +tmp_dst[159:152] := Saturate8(a[191:176]) +tmp_dst[167:160] := Saturate8(a[207:192]) +tmp_dst[175:168] := Saturate8(a[223:208]) +tmp_dst[183:176] := Saturate8(a[239:224]) +tmp_dst[191:184] := Saturate8(a[255:240]) +tmp_dst[199:192] := Saturate8(b[143:128]) +tmp_dst[207:200] := Saturate8(b[159:144]) +tmp_dst[215:208] := Saturate8(b[175:160]) +tmp_dst[223:216] := Saturate8(b[191:176]) +tmp_dst[231:224] := Saturate8(b[207:192]) +tmp_dst[239:232] := Saturate8(b[223:208]) +tmp_dst[247:240] := Saturate8(b[239:224]) +tmp_dst[255:248] := Saturate8(b[255:240]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +tmp_dst[143:128] := SaturateU16(a[159:128]) +tmp_dst[159:144] := SaturateU16(a[191:160]) +tmp_dst[175:160] := SaturateU16(a[223:192]) +tmp_dst[191:176] := SaturateU16(a[255:224]) +tmp_dst[207:192] := SaturateU16(b[159:128]) +tmp_dst[223:208] := SaturateU16(b[191:160]) +tmp_dst[239:224] := SaturateU16(b[223:192]) +tmp_dst[255:240] := SaturateU16(b[255:224]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +tmp_dst[143:128] := SaturateU16(a[159:128]) +tmp_dst[159:144] := SaturateU16(a[191:160]) +tmp_dst[175:160] := SaturateU16(a[223:192]) +tmp_dst[191:176] := SaturateU16(a[255:224]) +tmp_dst[207:192] := SaturateU16(b[159:128]) +tmp_dst[223:208] := SaturateU16(b[191:160]) +tmp_dst[239:224] := SaturateU16(b[223:192]) +tmp_dst[255:240] := SaturateU16(b[255:224]) +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +tmp_dst[135:128] := SaturateU8(a[143:128]) +tmp_dst[143:136] := SaturateU8(a[159:144]) +tmp_dst[151:144] := SaturateU8(a[175:160]) +tmp_dst[159:152] := SaturateU8(a[191:176]) +tmp_dst[167:160] := SaturateU8(a[207:192]) +tmp_dst[175:168] := SaturateU8(a[223:208]) +tmp_dst[183:176] := SaturateU8(a[239:224]) +tmp_dst[191:184] := SaturateU8(a[255:240]) +tmp_dst[199:192] := SaturateU8(b[143:128]) +tmp_dst[207:200] := SaturateU8(b[159:144]) +tmp_dst[215:208] := SaturateU8(b[175:160]) +tmp_dst[223:216] := SaturateU8(b[191:176]) +tmp_dst[231:224] := SaturateU8(b[207:192]) +tmp_dst[239:232] := SaturateU8(b[223:208]) +tmp_dst[247:240] := SaturateU8(b[239:224]) +tmp_dst[255:248] := SaturateU8(b[255:240]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +tmp_dst[135:128] := SaturateU8(a[143:128]) +tmp_dst[143:136] := SaturateU8(a[159:144]) +tmp_dst[151:144] := SaturateU8(a[175:160]) +tmp_dst[159:152] := SaturateU8(a[191:176]) +tmp_dst[167:160] := SaturateU8(a[207:192]) +tmp_dst[175:168] := SaturateU8(a[223:208]) +tmp_dst[183:176] := SaturateU8(a[239:224]) +tmp_dst[191:184] := SaturateU8(a[255:240]) +tmp_dst[199:192] := SaturateU8(b[143:128]) +tmp_dst[207:200] := SaturateU8(b[159:144]) +tmp_dst[215:208] := SaturateU8(b[175:160]) +tmp_dst[223:216] := SaturateU8(b[191:176]) +tmp_dst[231:224] := SaturateU8(b[207:192]) +tmp_dst[239:232] := SaturateU8(b[223:208]) +tmp_dst[247:240] := SaturateU8(b[239:224]) +tmp_dst[255:248] := SaturateU8(b[255:240]) +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + dst[l+7:l] := Saturate8(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + dst[l+7:l] := Saturate8(a[i+15:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + dst[l+7:l] := SaturateU8(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + dst[l+7:l] := SaturateU8(a[i+15:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + dst[l+7:l] := Truncate8(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + dst[l+7:l] := Truncate8(a[i+15:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 16-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 31 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 31 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 31 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512BW + AVX512VL +
immintrin.h
+ Shift +
+ + + + Reduce the packed 16-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[15:0] + src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] + src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_ADD(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_ADD(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[15:0] + src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] + src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_ADD(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_ADD(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[15:0] + src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] + src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_ADD(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_ADD(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[15:0] + src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] + src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_ADD(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_ADD(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[7:0] + src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] + src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_ADD(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_ADD(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[7:0] + src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] + src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_ADD(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_ADD(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[7:0] + src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] + src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_ADD(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_ADD(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[7:0] + src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] + src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_ADD(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_ADD(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[15:0] * src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] * src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_MUL(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MUL(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[15:0] * src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] * src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_MUL(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 1 + FI +ENDFOR +dst[15:0] := REDUCE_MUL(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[15:0] * src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] * src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_MUL(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MUL(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[15:0] * src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] * src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_MUL(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 1 + FI +ENDFOR +dst[15:0] := REDUCE_MUL(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[7:0] * src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] * src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_MUL(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MUL(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[7:0] * src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] * src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_MUL(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 1 + FI +ENDFOR +dst[7:0] := REDUCE_MUL(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[7:0] * src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] * src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_MUL(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MUL(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[7:0] * src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] * src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_MUL(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 1 + FI +ENDFOR +dst[7:0] := REDUCE_MUL(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[15:0] OR src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] OR src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_OR(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_OR(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[15:0] OR src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] OR src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_OR(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_OR(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[15:0] OR src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] OR src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_OR(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_OR(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[15:0] OR src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] OR src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_OR(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_OR(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[7:0] OR src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] OR src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_OR(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_OR(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[7:0] OR src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] OR src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_OR(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_OR(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[7:0] OR src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] OR src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_OR(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_OR(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[7:0] OR src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] OR src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_OR(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_OR(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[15:0] AND src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] AND src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_AND(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_AND(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[15:0] AND src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] AND src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_AND(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0xFFFF + FI +ENDFOR +dst[15:0] := REDUCE_AND(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[15:0] AND src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] AND src[i+16*len+31:i+16*len] + ENDFOR + RETURN REDUCE_AND(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_AND(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[15:0] AND src[31:16] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := src[i+15:i] AND src[i+16*len+15:i+16*len] + ENDFOR + RETURN REDUCE_AND(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0xFFFF + FI +ENDFOR +dst[15:0] := REDUCE_AND(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[7:0] AND src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] AND src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_AND(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_AND(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[7:0] AND src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] AND src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_AND(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0xFF + FI +ENDFOR +dst[7:0] := REDUCE_AND(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[7:0] AND src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] AND src[i+8*len+15:i+8*len] + ENDFOR + RETURN REDUCE_AND(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_AND(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[7:0] AND src[15:8] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := src[i+7:i] AND src[i+8*len+7:i+8*len] + ENDFOR + RETURN REDUCE_AND(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0xFF + FI +ENDFOR +dst[7:0] := REDUCE_AND(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed signed 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MAX(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := Int16(-0x8000) + FI +ENDFOR +dst[15:0] := REDUCE_MAX(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MAX(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := Int16(-0x8000) + FI +ENDFOR +dst[15:0] := REDUCE_MAX(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MAX(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := Int8(-0x80) + FI +ENDFOR +dst[7:0] := REDUCE_MAX(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MAX(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := Int8(-0x80) + FI +ENDFOR +dst[7:0] := REDUCE_MAX(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MAX(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_MAX(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MAX(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MAX(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0 + FI +ENDFOR +dst[15:0] := REDUCE_MAX(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MAX(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_MAX(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MAX(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MAX(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0 + FI +ENDFOR +dst[7:0] := REDUCE_MAX(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MIN(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := Int16(0x7FFF) + FI +ENDFOR +dst[15:0] := REDUCE_MIN(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MIN(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := Int16(0x7FFF) + FI +ENDFOR +dst[15:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MIN(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := Int8(0x7F) + FI +ENDFOR +dst[7:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MIN(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 31 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := Int8(0x7F) + FI +ENDFOR +dst[7:0] := REDUCE_MIN(tmp, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MIN(a, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0xFFFF + FI +ENDFOR +dst[15:0] := REDUCE_MIN(tmp, 8) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +dst[15:0] := REDUCE_MIN(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*16 + src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) + ENDFOR + RETURN REDUCE_MIN(src[16*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[i+15:i] := a[i+15:i] + ELSE + tmp[i+15:i] := 0xFFFF + FI +ENDFOR +dst[15:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MIN(a, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0xFF + FI +ENDFOR +dst[7:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +dst[7:0] := REDUCE_MIN(a, 32) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*8 + src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) + ENDFOR + RETURN REDUCE_MIN(src[8*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*8 + IF k[j] + tmp[i+7:i] := a[i+7:i] + ELSE + tmp[i+7:i] := 0xFF + FI +ENDFOR +dst[7:0] := REDUCE_MIN(tmp, 16) + + AVX512BW + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Unpack and interleave 32 bits from masks "a" and "b", and store the 64-bit result in "dst". + +dst[31:0] := b[31:0] +dst[63:32] := a[31:0] +dst[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 16 bits from masks "a" and "b", and store the 32-bit result in "dst". + +dst[15:0] := b[15:0] +dst[31:16] := a[15:0] +dst[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 3 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 7 + i := j*64 + dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 3 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 7 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. + +FOR i := 0 to 3 + tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] + tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] + tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] + tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] +ENDFOR +FOR j := 0 to 7 + i := j*64 + tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ + ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) + + tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ + ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) + + tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ + ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) + + tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ + ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) +ENDFOR +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". + +FOR j := 0 to 3 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + dst[i+127:i] := tmp[127:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + tmp_dst[i+127:i] := tmp[127:0] +ENDFOR +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*128 + tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) + tmp_dst[i+127:i] := tmp[127:0] +ENDFOR +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + off := 16*idx[i+4:i] + dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := idx[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + off := 16*idx[i+4:i] + dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + off := 16*idx[i+4:i] + dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + off := 16*idx[i+4:i] + dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + id := idx[i+4:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + id := idx[i+4:i]*16 + IF k[j] + dst[i+15:i] := a[id+15:id] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + id := idx[i+4:i]*16 + dst[i+15:i] := a[id+15:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". + +FOR j := 0 to 63 + i := j*8 + IF a[i+7] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := 0xFF + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := 0xFFFF + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". + +FOR j := 0 to 31 + i := j*16 + IF a[i+15] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". + +FOR j := 0 to 63 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +FOR j := 0 to 7 + i := j*64 + dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] + dst[i+63:i+16] := 0 +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 8-bit integers in "a" within 128-bit lanes using the control in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[5:0] := b[i+3:i] + (j & 0x30) + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[5:0] := b[i+3:i] + (j & 0x30) + dst[i+7:i] := a[index*8+7:index*8] + FI + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Swizzle +
+ + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[5:0] := b[i+3:i] + (j & 0x30) + dst[i+7:i] := a[index*8+7:index*8] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +tmp_dst[191:128] := a[191:128] +tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +tmp_dst[319:256] := a[319:256] +tmp_dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] +tmp_dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] +tmp_dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] +tmp_dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] +tmp_dst[447:384] := a[447:384] +tmp_dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] +tmp_dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] +tmp_dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] +tmp_dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := a[63:0] +tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +tmp_dst[191:128] := a[191:128] +tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +tmp_dst[319:256] := a[319:256] +tmp_dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] +tmp_dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] +tmp_dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] +tmp_dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] +tmp_dst[447:384] := a[447:384] +tmp_dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] +tmp_dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] +tmp_dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] +tmp_dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst". + +dst[63:0] := a[63:0] +dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] +dst[191:128] := a[191:128] +dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] +dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] +dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] +dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] +dst[319:256] := a[319:256] +dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] +dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] +dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] +dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] +dst[447:384] := a[447:384] +dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] +dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] +dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] +dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +tmp_dst[255:192] := a[255:192] +tmp_dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] +tmp_dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] +tmp_dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] +tmp_dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] +tmp_dst[383:320] := a[383:320] +tmp_dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] +tmp_dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] +tmp_dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] +tmp_dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] +tmp_dst[511:448] := a[511:448] +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +tmp_dst[127:64] := a[127:64] +tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +tmp_dst[255:192] := a[255:192] +tmp_dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] +tmp_dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] +tmp_dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] +tmp_dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] +tmp_dst[383:320] := a[383:320] +tmp_dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] +tmp_dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] +tmp_dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] +tmp_dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] +tmp_dst[511:448] := a[511:448] +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst". + +dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +dst[127:64] := a[127:64] +dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] +dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] +dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] +dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] +dst[255:192] := a[255:192] +dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] +dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] +dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] +dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] +dst[383:320] := a[383:320] +dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] +dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] +dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] +dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] +dst[511:448] := a[511:448] +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Miscellaneous +
+ + + + + + Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + + Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + + + Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + + Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 32 packed 16-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 64 packed 8-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Load +
+ + + + Load 32-bit mask from memory into "k". + +k[31:0] := MEM[mem_addr+31:mem_addr] + + + AVX512BW +
immintrin.h
+ Load +
+ + + + Load 64-bit mask from memory into "k". + +k[63:0] := MEM[mem_addr+63:mem_addr] + + + AVX512BW +
immintrin.h
+ Load +
+ + + + + + Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Move +
+ + + + + Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Move +
+ + + + + + Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Move +
+ + + + + Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Move +
+ + + + + + Store packed 16-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 31 + i := j*16 + IF k[j] + MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + + Store packed 8-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 63 + i := j*8 + IF k[j] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 32 packed 16-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 64 packed 8-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + Store 32-bit mask from "a" into memory. + +MEM[mem_addr+31:mem_addr] := a[31:0] + + + AVX512BW +
immintrin.h
+ Store +
+ + + + + Store 64-bit mask from "a" into memory. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + AVX512BW +
immintrin.h
+ Store +
+ + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := ABS(a[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := ABS(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ABS(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ABS(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] + b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] + b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[i+7:i] - b[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[i+15:i] - b[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +tmp_dst[143:128] := Saturate16(a[159:128]) +tmp_dst[159:144] := Saturate16(a[191:160]) +tmp_dst[175:160] := Saturate16(a[223:192]) +tmp_dst[191:176] := Saturate16(a[255:224]) +tmp_dst[207:192] := Saturate16(b[159:128]) +tmp_dst[223:208] := Saturate16(b[191:160]) +tmp_dst[239:224] := Saturate16(b[223:192]) +tmp_dst[255:240] := Saturate16(b[255:224]) +tmp_dst[271:256] := Saturate16(a[287:256]) +tmp_dst[287:272] := Saturate16(a[319:288]) +tmp_dst[303:288] := Saturate16(a[351:320]) +tmp_dst[319:304] := Saturate16(a[383:352]) +tmp_dst[335:320] := Saturate16(b[287:256]) +tmp_dst[351:336] := Saturate16(b[319:288]) +tmp_dst[367:352] := Saturate16(b[351:320]) +tmp_dst[383:368] := Saturate16(b[383:352]) +tmp_dst[399:384] := Saturate16(a[415:384]) +tmp_dst[415:400] := Saturate16(a[447:416]) +tmp_dst[431:416] := Saturate16(a[479:448]) +tmp_dst[447:432] := Saturate16(a[511:480]) +tmp_dst[463:448] := Saturate16(b[415:384]) +tmp_dst[479:464] := Saturate16(b[447:416]) +tmp_dst[495:480] := Saturate16(b[479:448]) +tmp_dst[511:496] := Saturate16(b[511:480]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := Saturate16(a[31:0]) +tmp_dst[31:16] := Saturate16(a[63:32]) +tmp_dst[47:32] := Saturate16(a[95:64]) +tmp_dst[63:48] := Saturate16(a[127:96]) +tmp_dst[79:64] := Saturate16(b[31:0]) +tmp_dst[95:80] := Saturate16(b[63:32]) +tmp_dst[111:96] := Saturate16(b[95:64]) +tmp_dst[127:112] := Saturate16(b[127:96]) +tmp_dst[143:128] := Saturate16(a[159:128]) +tmp_dst[159:144] := Saturate16(a[191:160]) +tmp_dst[175:160] := Saturate16(a[223:192]) +tmp_dst[191:176] := Saturate16(a[255:224]) +tmp_dst[207:192] := Saturate16(b[159:128]) +tmp_dst[223:208] := Saturate16(b[191:160]) +tmp_dst[239:224] := Saturate16(b[223:192]) +tmp_dst[255:240] := Saturate16(b[255:224]) +tmp_dst[271:256] := Saturate16(a[287:256]) +tmp_dst[287:272] := Saturate16(a[319:288]) +tmp_dst[303:288] := Saturate16(a[351:320]) +tmp_dst[319:304] := Saturate16(a[383:352]) +tmp_dst[335:320] := Saturate16(b[287:256]) +tmp_dst[351:336] := Saturate16(b[319:288]) +tmp_dst[367:352] := Saturate16(b[351:320]) +tmp_dst[383:368] := Saturate16(b[383:352]) +tmp_dst[399:384] := Saturate16(a[415:384]) +tmp_dst[415:400] := Saturate16(a[447:416]) +tmp_dst[431:416] := Saturate16(a[479:448]) +tmp_dst[447:432] := Saturate16(a[511:480]) +tmp_dst[463:448] := Saturate16(b[415:384]) +tmp_dst[479:464] := Saturate16(b[447:416]) +tmp_dst[495:480] := Saturate16(b[479:448]) +tmp_dst[511:496] := Saturate16(b[511:480]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(a[95:64]) +dst[63:48] := Saturate16(a[127:96]) +dst[79:64] := Saturate16(b[31:0]) +dst[95:80] := Saturate16(b[63:32]) +dst[111:96] := Saturate16(b[95:64]) +dst[127:112] := Saturate16(b[127:96]) +dst[143:128] := Saturate16(a[159:128]) +dst[159:144] := Saturate16(a[191:160]) +dst[175:160] := Saturate16(a[223:192]) +dst[191:176] := Saturate16(a[255:224]) +dst[207:192] := Saturate16(b[159:128]) +dst[223:208] := Saturate16(b[191:160]) +dst[239:224] := Saturate16(b[223:192]) +dst[255:240] := Saturate16(b[255:224]) +dst[271:256] := Saturate16(a[287:256]) +dst[287:272] := Saturate16(a[319:288]) +dst[303:288] := Saturate16(a[351:320]) +dst[319:304] := Saturate16(a[383:352]) +dst[335:320] := Saturate16(b[287:256]) +dst[351:336] := Saturate16(b[319:288]) +dst[367:352] := Saturate16(b[351:320]) +dst[383:368] := Saturate16(b[383:352]) +dst[399:384] := Saturate16(a[415:384]) +dst[415:400] := Saturate16(a[447:416]) +dst[431:416] := Saturate16(a[479:448]) +dst[447:432] := Saturate16(a[511:480]) +dst[463:448] := Saturate16(b[415:384]) +dst[479:464] := Saturate16(b[447:416]) +dst[495:480] := Saturate16(b[479:448]) +dst[511:496] := Saturate16(b[511:480]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +tmp_dst[135:128] := Saturate8(a[143:128]) +tmp_dst[143:136] := Saturate8(a[159:144]) +tmp_dst[151:144] := Saturate8(a[175:160]) +tmp_dst[159:152] := Saturate8(a[191:176]) +tmp_dst[167:160] := Saturate8(a[207:192]) +tmp_dst[175:168] := Saturate8(a[223:208]) +tmp_dst[183:176] := Saturate8(a[239:224]) +tmp_dst[191:184] := Saturate8(a[255:240]) +tmp_dst[199:192] := Saturate8(b[143:128]) +tmp_dst[207:200] := Saturate8(b[159:144]) +tmp_dst[215:208] := Saturate8(b[175:160]) +tmp_dst[223:216] := Saturate8(b[191:176]) +tmp_dst[231:224] := Saturate8(b[207:192]) +tmp_dst[239:232] := Saturate8(b[223:208]) +tmp_dst[247:240] := Saturate8(b[239:224]) +tmp_dst[255:248] := Saturate8(b[255:240]) +tmp_dst[263:256] := Saturate8(a[271:256]) +tmp_dst[271:264] := Saturate8(a[287:272]) +tmp_dst[279:272] := Saturate8(a[303:288]) +tmp_dst[287:280] := Saturate8(a[319:304]) +tmp_dst[295:288] := Saturate8(a[335:320]) +tmp_dst[303:296] := Saturate8(a[351:336]) +tmp_dst[311:304] := Saturate8(a[367:352]) +tmp_dst[319:312] := Saturate8(a[383:368]) +tmp_dst[327:320] := Saturate8(b[271:256]) +tmp_dst[335:328] := Saturate8(b[287:272]) +tmp_dst[343:336] := Saturate8(b[303:288]) +tmp_dst[351:344] := Saturate8(b[319:304]) +tmp_dst[359:352] := Saturate8(b[335:320]) +tmp_dst[367:360] := Saturate8(b[351:336]) +tmp_dst[375:368] := Saturate8(b[367:352]) +tmp_dst[383:376] := Saturate8(b[383:368]) +tmp_dst[391:384] := Saturate8(a[399:384]) +tmp_dst[399:392] := Saturate8(a[415:400]) +tmp_dst[407:400] := Saturate8(a[431:416]) +tmp_dst[415:408] := Saturate8(a[447:432]) +tmp_dst[423:416] := Saturate8(a[463:448]) +tmp_dst[431:424] := Saturate8(a[479:464]) +tmp_dst[439:432] := Saturate8(a[495:480]) +tmp_dst[447:440] := Saturate8(a[511:496]) +tmp_dst[455:448] := Saturate8(b[399:384]) +tmp_dst[463:456] := Saturate8(b[415:400]) +tmp_dst[471:464] := Saturate8(b[431:416]) +tmp_dst[479:472] := Saturate8(b[447:432]) +tmp_dst[487:480] := Saturate8(b[463:448]) +tmp_dst[495:488] := Saturate8(b[479:464]) +tmp_dst[503:496] := Saturate8(b[495:480]) +tmp_dst[511:504] := Saturate8(b[511:496]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := Saturate8(a[15:0]) +tmp_dst[15:8] := Saturate8(a[31:16]) +tmp_dst[23:16] := Saturate8(a[47:32]) +tmp_dst[31:24] := Saturate8(a[63:48]) +tmp_dst[39:32] := Saturate8(a[79:64]) +tmp_dst[47:40] := Saturate8(a[95:80]) +tmp_dst[55:48] := Saturate8(a[111:96]) +tmp_dst[63:56] := Saturate8(a[127:112]) +tmp_dst[71:64] := Saturate8(b[15:0]) +tmp_dst[79:72] := Saturate8(b[31:16]) +tmp_dst[87:80] := Saturate8(b[47:32]) +tmp_dst[95:88] := Saturate8(b[63:48]) +tmp_dst[103:96] := Saturate8(b[79:64]) +tmp_dst[111:104] := Saturate8(b[95:80]) +tmp_dst[119:112] := Saturate8(b[111:96]) +tmp_dst[127:120] := Saturate8(b[127:112]) +tmp_dst[135:128] := Saturate8(a[143:128]) +tmp_dst[143:136] := Saturate8(a[159:144]) +tmp_dst[151:144] := Saturate8(a[175:160]) +tmp_dst[159:152] := Saturate8(a[191:176]) +tmp_dst[167:160] := Saturate8(a[207:192]) +tmp_dst[175:168] := Saturate8(a[223:208]) +tmp_dst[183:176] := Saturate8(a[239:224]) +tmp_dst[191:184] := Saturate8(a[255:240]) +tmp_dst[199:192] := Saturate8(b[143:128]) +tmp_dst[207:200] := Saturate8(b[159:144]) +tmp_dst[215:208] := Saturate8(b[175:160]) +tmp_dst[223:216] := Saturate8(b[191:176]) +tmp_dst[231:224] := Saturate8(b[207:192]) +tmp_dst[239:232] := Saturate8(b[223:208]) +tmp_dst[247:240] := Saturate8(b[239:224]) +tmp_dst[255:248] := Saturate8(b[255:240]) +tmp_dst[263:256] := Saturate8(a[271:256]) +tmp_dst[271:264] := Saturate8(a[287:272]) +tmp_dst[279:272] := Saturate8(a[303:288]) +tmp_dst[287:280] := Saturate8(a[319:304]) +tmp_dst[295:288] := Saturate8(a[335:320]) +tmp_dst[303:296] := Saturate8(a[351:336]) +tmp_dst[311:304] := Saturate8(a[367:352]) +tmp_dst[319:312] := Saturate8(a[383:368]) +tmp_dst[327:320] := Saturate8(b[271:256]) +tmp_dst[335:328] := Saturate8(b[287:272]) +tmp_dst[343:336] := Saturate8(b[303:288]) +tmp_dst[351:344] := Saturate8(b[319:304]) +tmp_dst[359:352] := Saturate8(b[335:320]) +tmp_dst[367:360] := Saturate8(b[351:336]) +tmp_dst[375:368] := Saturate8(b[367:352]) +tmp_dst[383:376] := Saturate8(b[383:368]) +tmp_dst[391:384] := Saturate8(a[399:384]) +tmp_dst[399:392] := Saturate8(a[415:400]) +tmp_dst[407:400] := Saturate8(a[431:416]) +tmp_dst[415:408] := Saturate8(a[447:432]) +tmp_dst[423:416] := Saturate8(a[463:448]) +tmp_dst[431:424] := Saturate8(a[479:464]) +tmp_dst[439:432] := Saturate8(a[495:480]) +tmp_dst[447:440] := Saturate8(a[511:496]) +tmp_dst[455:448] := Saturate8(b[399:384]) +tmp_dst[463:456] := Saturate8(b[415:400]) +tmp_dst[471:464] := Saturate8(b[431:416]) +tmp_dst[479:472] := Saturate8(b[447:432]) +tmp_dst[487:480] := Saturate8(b[463:448]) +tmp_dst[495:488] := Saturate8(b[479:464]) +tmp_dst[503:496] := Saturate8(b[495:480]) +tmp_dst[511:504] := Saturate8(b[511:496]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(a[79:64]) +dst[47:40] := Saturate8(a[95:80]) +dst[55:48] := Saturate8(a[111:96]) +dst[63:56] := Saturate8(a[127:112]) +dst[71:64] := Saturate8(b[15:0]) +dst[79:72] := Saturate8(b[31:16]) +dst[87:80] := Saturate8(b[47:32]) +dst[95:88] := Saturate8(b[63:48]) +dst[103:96] := Saturate8(b[79:64]) +dst[111:104] := Saturate8(b[95:80]) +dst[119:112] := Saturate8(b[111:96]) +dst[127:120] := Saturate8(b[127:112]) +dst[135:128] := Saturate8(a[143:128]) +dst[143:136] := Saturate8(a[159:144]) +dst[151:144] := Saturate8(a[175:160]) +dst[159:152] := Saturate8(a[191:176]) +dst[167:160] := Saturate8(a[207:192]) +dst[175:168] := Saturate8(a[223:208]) +dst[183:176] := Saturate8(a[239:224]) +dst[191:184] := Saturate8(a[255:240]) +dst[199:192] := Saturate8(b[143:128]) +dst[207:200] := Saturate8(b[159:144]) +dst[215:208] := Saturate8(b[175:160]) +dst[223:216] := Saturate8(b[191:176]) +dst[231:224] := Saturate8(b[207:192]) +dst[239:232] := Saturate8(b[223:208]) +dst[247:240] := Saturate8(b[239:224]) +dst[255:248] := Saturate8(b[255:240]) +dst[263:256] := Saturate8(a[271:256]) +dst[271:264] := Saturate8(a[287:272]) +dst[279:272] := Saturate8(a[303:288]) +dst[287:280] := Saturate8(a[319:304]) +dst[295:288] := Saturate8(a[335:320]) +dst[303:296] := Saturate8(a[351:336]) +dst[311:304] := Saturate8(a[367:352]) +dst[319:312] := Saturate8(a[383:368]) +dst[327:320] := Saturate8(b[271:256]) +dst[335:328] := Saturate8(b[287:272]) +dst[343:336] := Saturate8(b[303:288]) +dst[351:344] := Saturate8(b[319:304]) +dst[359:352] := Saturate8(b[335:320]) +dst[367:360] := Saturate8(b[351:336]) +dst[375:368] := Saturate8(b[367:352]) +dst[383:376] := Saturate8(b[383:368]) +dst[391:384] := Saturate8(a[399:384]) +dst[399:392] := Saturate8(a[415:400]) +dst[407:400] := Saturate8(a[431:416]) +dst[415:408] := Saturate8(a[447:432]) +dst[423:416] := Saturate8(a[463:448]) +dst[431:424] := Saturate8(a[479:464]) +dst[439:432] := Saturate8(a[495:480]) +dst[447:440] := Saturate8(a[511:496]) +dst[455:448] := Saturate8(b[399:384]) +dst[463:456] := Saturate8(b[415:400]) +dst[471:464] := Saturate8(b[431:416]) +dst[479:472] := Saturate8(b[447:432]) +dst[487:480] := Saturate8(b[463:448]) +dst[495:488] := Saturate8(b[479:464]) +dst[503:496] := Saturate8(b[495:480]) +dst[511:504] := Saturate8(b[511:496]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +tmp_dst[143:128] := SaturateU16(a[159:128]) +tmp_dst[159:144] := SaturateU16(a[191:160]) +tmp_dst[175:160] := SaturateU16(a[223:192]) +tmp_dst[191:176] := SaturateU16(a[255:224]) +tmp_dst[207:192] := SaturateU16(b[159:128]) +tmp_dst[223:208] := SaturateU16(b[191:160]) +tmp_dst[239:224] := SaturateU16(b[223:192]) +tmp_dst[255:240] := SaturateU16(b[255:224]) +tmp_dst[271:256] := SaturateU16(a[287:256]) +tmp_dst[287:272] := SaturateU16(a[319:288]) +tmp_dst[303:288] := SaturateU16(a[351:320]) +tmp_dst[319:304] := SaturateU16(a[383:352]) +tmp_dst[335:320] := SaturateU16(b[287:256]) +tmp_dst[351:336] := SaturateU16(b[319:288]) +tmp_dst[367:352] := SaturateU16(b[351:320]) +tmp_dst[383:368] := SaturateU16(b[383:352]) +tmp_dst[399:384] := SaturateU16(a[415:384]) +tmp_dst[415:400] := SaturateU16(a[447:416]) +tmp_dst[431:416] := SaturateU16(a[479:448]) +tmp_dst[447:432] := SaturateU16(a[511:480]) +tmp_dst[463:448] := SaturateU16(b[415:384]) +tmp_dst[479:464] := SaturateU16(b[447:416]) +tmp_dst[495:480] := SaturateU16(b[479:448]) +tmp_dst[511:496] := SaturateU16(b[511:480]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[15:0] := SaturateU16(a[31:0]) +tmp_dst[31:16] := SaturateU16(a[63:32]) +tmp_dst[47:32] := SaturateU16(a[95:64]) +tmp_dst[63:48] := SaturateU16(a[127:96]) +tmp_dst[79:64] := SaturateU16(b[31:0]) +tmp_dst[95:80] := SaturateU16(b[63:32]) +tmp_dst[111:96] := SaturateU16(b[95:64]) +tmp_dst[127:112] := SaturateU16(b[127:96]) +tmp_dst[143:128] := SaturateU16(a[159:128]) +tmp_dst[159:144] := SaturateU16(a[191:160]) +tmp_dst[175:160] := SaturateU16(a[223:192]) +tmp_dst[191:176] := SaturateU16(a[255:224]) +tmp_dst[207:192] := SaturateU16(b[159:128]) +tmp_dst[223:208] := SaturateU16(b[191:160]) +tmp_dst[239:224] := SaturateU16(b[223:192]) +tmp_dst[255:240] := SaturateU16(b[255:224]) +tmp_dst[271:256] := SaturateU16(a[287:256]) +tmp_dst[287:272] := SaturateU16(a[319:288]) +tmp_dst[303:288] := SaturateU16(a[351:320]) +tmp_dst[319:304] := SaturateU16(a[383:352]) +tmp_dst[335:320] := SaturateU16(b[287:256]) +tmp_dst[351:336] := SaturateU16(b[319:288]) +tmp_dst[367:352] := SaturateU16(b[351:320]) +tmp_dst[383:368] := SaturateU16(b[383:352]) +tmp_dst[399:384] := SaturateU16(a[415:384]) +tmp_dst[415:400] := SaturateU16(a[447:416]) +tmp_dst[431:416] := SaturateU16(a[479:448]) +tmp_dst[447:432] := SaturateU16(a[511:480]) +tmp_dst[463:448] := SaturateU16(b[415:384]) +tmp_dst[479:464] := SaturateU16(b[447:416]) +tmp_dst[495:480] := SaturateU16(b[479:448]) +tmp_dst[511:496] := SaturateU16(b[511:480]) +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := tmp_dst[i+15:i] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". + +dst[15:0] := SaturateU16(a[31:0]) +dst[31:16] := SaturateU16(a[63:32]) +dst[47:32] := SaturateU16(a[95:64]) +dst[63:48] := SaturateU16(a[127:96]) +dst[79:64] := SaturateU16(b[31:0]) +dst[95:80] := SaturateU16(b[63:32]) +dst[111:96] := SaturateU16(b[95:64]) +dst[127:112] := SaturateU16(b[127:96]) +dst[143:128] := SaturateU16(a[159:128]) +dst[159:144] := SaturateU16(a[191:160]) +dst[175:160] := SaturateU16(a[223:192]) +dst[191:176] := SaturateU16(a[255:224]) +dst[207:192] := SaturateU16(b[159:128]) +dst[223:208] := SaturateU16(b[191:160]) +dst[239:224] := SaturateU16(b[223:192]) +dst[255:240] := SaturateU16(b[255:224]) +dst[271:256] := SaturateU16(a[287:256]) +dst[287:272] := SaturateU16(a[319:288]) +dst[303:288] := SaturateU16(a[351:320]) +dst[319:304] := SaturateU16(a[383:352]) +dst[335:320] := SaturateU16(b[287:256]) +dst[351:336] := SaturateU16(b[319:288]) +dst[367:352] := SaturateU16(b[351:320]) +dst[383:368] := SaturateU16(b[383:352]) +dst[399:384] := SaturateU16(a[415:384]) +dst[415:400] := SaturateU16(a[447:416]) +dst[431:416] := SaturateU16(a[479:448]) +dst[447:432] := SaturateU16(a[511:480]) +dst[463:448] := SaturateU16(b[415:384]) +dst[479:464] := SaturateU16(b[447:416]) +dst[495:480] := SaturateU16(b[479:448]) +dst[511:496] := SaturateU16(b[511:480]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +tmp_dst[135:128] := SaturateU8(a[143:128]) +tmp_dst[143:136] := SaturateU8(a[159:144]) +tmp_dst[151:144] := SaturateU8(a[175:160]) +tmp_dst[159:152] := SaturateU8(a[191:176]) +tmp_dst[167:160] := SaturateU8(a[207:192]) +tmp_dst[175:168] := SaturateU8(a[223:208]) +tmp_dst[183:176] := SaturateU8(a[239:224]) +tmp_dst[191:184] := SaturateU8(a[255:240]) +tmp_dst[199:192] := SaturateU8(b[143:128]) +tmp_dst[207:200] := SaturateU8(b[159:144]) +tmp_dst[215:208] := SaturateU8(b[175:160]) +tmp_dst[223:216] := SaturateU8(b[191:176]) +tmp_dst[231:224] := SaturateU8(b[207:192]) +tmp_dst[239:232] := SaturateU8(b[223:208]) +tmp_dst[247:240] := SaturateU8(b[239:224]) +tmp_dst[255:248] := SaturateU8(b[255:240]) +tmp_dst[263:256] := SaturateU8(a[271:256]) +tmp_dst[271:264] := SaturateU8(a[287:272]) +tmp_dst[279:272] := SaturateU8(a[303:288]) +tmp_dst[287:280] := SaturateU8(a[319:304]) +tmp_dst[295:288] := SaturateU8(a[335:320]) +tmp_dst[303:296] := SaturateU8(a[351:336]) +tmp_dst[311:304] := SaturateU8(a[367:352]) +tmp_dst[319:312] := SaturateU8(a[383:368]) +tmp_dst[327:320] := SaturateU8(b[271:256]) +tmp_dst[335:328] := SaturateU8(b[287:272]) +tmp_dst[343:336] := SaturateU8(b[303:288]) +tmp_dst[351:344] := SaturateU8(b[319:304]) +tmp_dst[359:352] := SaturateU8(b[335:320]) +tmp_dst[367:360] := SaturateU8(b[351:336]) +tmp_dst[375:368] := SaturateU8(b[367:352]) +tmp_dst[383:376] := SaturateU8(b[383:368]) +tmp_dst[391:384] := SaturateU8(a[399:384]) +tmp_dst[399:392] := SaturateU8(a[415:400]) +tmp_dst[407:400] := SaturateU8(a[431:416]) +tmp_dst[415:408] := SaturateU8(a[447:432]) +tmp_dst[423:416] := SaturateU8(a[463:448]) +tmp_dst[431:424] := SaturateU8(a[479:464]) +tmp_dst[439:432] := SaturateU8(a[495:480]) +tmp_dst[447:440] := SaturateU8(a[511:496]) +tmp_dst[455:448] := SaturateU8(b[399:384]) +tmp_dst[463:456] := SaturateU8(b[415:400]) +tmp_dst[471:464] := SaturateU8(b[431:416]) +tmp_dst[479:472] := SaturateU8(b[447:432]) +tmp_dst[487:480] := SaturateU8(b[463:448]) +tmp_dst[495:488] := SaturateU8(b[479:464]) +tmp_dst[503:496] := SaturateU8(b[495:480]) +tmp_dst[511:504] := SaturateU8(b[511:496]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[7:0] := SaturateU8(a[15:0]) +tmp_dst[15:8] := SaturateU8(a[31:16]) +tmp_dst[23:16] := SaturateU8(a[47:32]) +tmp_dst[31:24] := SaturateU8(a[63:48]) +tmp_dst[39:32] := SaturateU8(a[79:64]) +tmp_dst[47:40] := SaturateU8(a[95:80]) +tmp_dst[55:48] := SaturateU8(a[111:96]) +tmp_dst[63:56] := SaturateU8(a[127:112]) +tmp_dst[71:64] := SaturateU8(b[15:0]) +tmp_dst[79:72] := SaturateU8(b[31:16]) +tmp_dst[87:80] := SaturateU8(b[47:32]) +tmp_dst[95:88] := SaturateU8(b[63:48]) +tmp_dst[103:96] := SaturateU8(b[79:64]) +tmp_dst[111:104] := SaturateU8(b[95:80]) +tmp_dst[119:112] := SaturateU8(b[111:96]) +tmp_dst[127:120] := SaturateU8(b[127:112]) +tmp_dst[135:128] := SaturateU8(a[143:128]) +tmp_dst[143:136] := SaturateU8(a[159:144]) +tmp_dst[151:144] := SaturateU8(a[175:160]) +tmp_dst[159:152] := SaturateU8(a[191:176]) +tmp_dst[167:160] := SaturateU8(a[207:192]) +tmp_dst[175:168] := SaturateU8(a[223:208]) +tmp_dst[183:176] := SaturateU8(a[239:224]) +tmp_dst[191:184] := SaturateU8(a[255:240]) +tmp_dst[199:192] := SaturateU8(b[143:128]) +tmp_dst[207:200] := SaturateU8(b[159:144]) +tmp_dst[215:208] := SaturateU8(b[175:160]) +tmp_dst[223:216] := SaturateU8(b[191:176]) +tmp_dst[231:224] := SaturateU8(b[207:192]) +tmp_dst[239:232] := SaturateU8(b[223:208]) +tmp_dst[247:240] := SaturateU8(b[239:224]) +tmp_dst[255:248] := SaturateU8(b[255:240]) +tmp_dst[263:256] := SaturateU8(a[271:256]) +tmp_dst[271:264] := SaturateU8(a[287:272]) +tmp_dst[279:272] := SaturateU8(a[303:288]) +tmp_dst[287:280] := SaturateU8(a[319:304]) +tmp_dst[295:288] := SaturateU8(a[335:320]) +tmp_dst[303:296] := SaturateU8(a[351:336]) +tmp_dst[311:304] := SaturateU8(a[367:352]) +tmp_dst[319:312] := SaturateU8(a[383:368]) +tmp_dst[327:320] := SaturateU8(b[271:256]) +tmp_dst[335:328] := SaturateU8(b[287:272]) +tmp_dst[343:336] := SaturateU8(b[303:288]) +tmp_dst[351:344] := SaturateU8(b[319:304]) +tmp_dst[359:352] := SaturateU8(b[335:320]) +tmp_dst[367:360] := SaturateU8(b[351:336]) +tmp_dst[375:368] := SaturateU8(b[367:352]) +tmp_dst[383:376] := SaturateU8(b[383:368]) +tmp_dst[391:384] := SaturateU8(a[399:384]) +tmp_dst[399:392] := SaturateU8(a[415:400]) +tmp_dst[407:400] := SaturateU8(a[431:416]) +tmp_dst[415:408] := SaturateU8(a[447:432]) +tmp_dst[423:416] := SaturateU8(a[463:448]) +tmp_dst[431:424] := SaturateU8(a[479:464]) +tmp_dst[439:432] := SaturateU8(a[495:480]) +tmp_dst[447:440] := SaturateU8(a[511:496]) +tmp_dst[455:448] := SaturateU8(b[399:384]) +tmp_dst[463:456] := SaturateU8(b[415:400]) +tmp_dst[471:464] := SaturateU8(b[431:416]) +tmp_dst[479:472] := SaturateU8(b[447:432]) +tmp_dst[487:480] := SaturateU8(b[463:448]) +tmp_dst[495:488] := SaturateU8(b[479:464]) +tmp_dst[503:496] := SaturateU8(b[495:480]) +tmp_dst[511:504] := SaturateU8(b[511:496]) +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := tmp_dst[i+7:i] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Miscellaneous + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(a[79:64]) +dst[47:40] := SaturateU8(a[95:80]) +dst[55:48] := SaturateU8(a[111:96]) +dst[63:56] := SaturateU8(a[127:112]) +dst[71:64] := SaturateU8(b[15:0]) +dst[79:72] := SaturateU8(b[31:16]) +dst[87:80] := SaturateU8(b[47:32]) +dst[95:88] := SaturateU8(b[63:48]) +dst[103:96] := SaturateU8(b[79:64]) +dst[111:104] := SaturateU8(b[95:80]) +dst[119:112] := SaturateU8(b[111:96]) +dst[127:120] := SaturateU8(b[127:112]) +dst[135:128] := SaturateU8(a[143:128]) +dst[143:136] := SaturateU8(a[159:144]) +dst[151:144] := SaturateU8(a[175:160]) +dst[159:152] := SaturateU8(a[191:176]) +dst[167:160] := SaturateU8(a[207:192]) +dst[175:168] := SaturateU8(a[223:208]) +dst[183:176] := SaturateU8(a[239:224]) +dst[191:184] := SaturateU8(a[255:240]) +dst[199:192] := SaturateU8(b[143:128]) +dst[207:200] := SaturateU8(b[159:144]) +dst[215:208] := SaturateU8(b[175:160]) +dst[223:216] := SaturateU8(b[191:176]) +dst[231:224] := SaturateU8(b[207:192]) +dst[239:232] := SaturateU8(b[223:208]) +dst[247:240] := SaturateU8(b[239:224]) +dst[255:248] := SaturateU8(b[255:240]) +dst[263:256] := SaturateU8(a[271:256]) +dst[271:264] := SaturateU8(a[287:272]) +dst[279:272] := SaturateU8(a[303:288]) +dst[287:280] := SaturateU8(a[319:304]) +dst[295:288] := SaturateU8(a[335:320]) +dst[303:296] := SaturateU8(a[351:336]) +dst[311:304] := SaturateU8(a[367:352]) +dst[319:312] := SaturateU8(a[383:368]) +dst[327:320] := SaturateU8(b[271:256]) +dst[335:328] := SaturateU8(b[287:272]) +dst[343:336] := SaturateU8(b[303:288]) +dst[351:344] := SaturateU8(b[319:304]) +dst[359:352] := SaturateU8(b[335:320]) +dst[367:360] := SaturateU8(b[351:336]) +dst[375:368] := SaturateU8(b[367:352]) +dst[383:376] := SaturateU8(b[383:368]) +dst[391:384] := SaturateU8(a[399:384]) +dst[399:392] := SaturateU8(a[415:400]) +dst[407:400] := SaturateU8(a[431:416]) +dst[415:408] := SaturateU8(a[447:432]) +dst[423:416] := SaturateU8(a[463:448]) +dst[431:424] := SaturateU8(a[479:464]) +dst[439:432] := SaturateU8(a[495:480]) +dst[447:440] := SaturateU8(a[511:496]) +dst[455:448] := SaturateU8(b[399:384]) +dst[463:456] := SaturateU8(b[415:400]) +dst[471:464] := SaturateU8(b[431:416]) +dst[479:472] := SaturateU8(b[447:432]) +dst[487:480] := SaturateU8(b[463:448]) +dst[495:488] := SaturateU8(b[479:464]) +dst[503:496] := SaturateU8(b[495:480]) +dst[511:504] := SaturateU8(b[511:496]) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + dst[l+7:l] := Saturate8(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + l := j*16 + dst[l+15:l] := SignExtend16(a[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := SignExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + dst[l+7:l] := SaturateU8(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + dst[l+7:l] := Truncate8(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) + FI +ENDFOR + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := 16*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+15:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + l := j*16 + dst[l+15:l] := ZeroExtend16(a[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + l := j*16 + IF k[j] + dst[l+15:l] := ZeroExtend16(a[i+7:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Convert +
+ + + + + + Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Set +
+ + + + + Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[7:0] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Set +
+ + + + + + Broadcast 16-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Set +
+ + + + + Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[15:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Set +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 63 + i := j*8 + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 31 + i := j*16 + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 63 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 31 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 63 + i := j*8 + IF k1[j] + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 63 + i := j*8 + k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 31 + i := j*16 + IF k1[j] + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 31 + i := j*16 + k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Compare +
+ + + + + Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) +dst[255:128] := a[255:128] << (tmp*8) +dst[383:256] := a[383:256] << (tmp*8) +dst[511:384] := a[511:384] << (tmp*8) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) +dst[255:128] := a[255:128] >> (tmp*8) +dst[383:256] := a[383:256] >> (tmp*8) +dst[511:384] := a[511:384] >> (tmp*8) +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[i+15:i] < 16 + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512BW +
immintrin.h
+ Shift +
+ + + + + Add 32-bit masks in "a" and "b", and store the result in "k". + +k[31:0] := a[31:0] + b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Add 64-bit masks in "a" and "b", and store the result in "k". + +k[63:0] := a[63:0] + b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 32-bit masks "a" and "b", and store the result in "k". + +k[31:0] := a[31:0] AND b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 64-bit masks "a" and "b", and store the result in "k". + +k[63:0] := a[63:0] AND b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 32-bit masks "a" and then AND with "b", and store the result in "k". + +k[31:0] := (NOT a[31:0]) AND b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 64-bit masks "a" and then AND with "b", and store the result in "k". + +k[63:0] := (NOT a[63:0]) AND b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 32-bit mask "a", and store the result in "k". + +k[31:0] := NOT a[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 64-bit mask "a", and store the result in "k". + +k[63:0] := NOT a[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 32-bit masks "a" and "b", and store the result in "k". + +k[31:0] := a[31:0] OR b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 64-bit masks "a" and "b", and store the result in "k". + +k[63:0] := a[63:0] OR b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 32-bit masks "a" and "b", and store the result in "k". + +k[31:0] := NOT (a[31:0] XOR b[31:0]) +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 64-bit masks "a" and "b", and store the result in "k". + +k[63:0] := NOT (a[63:0] XOR b[63:0]) +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 32-bit masks "a" and "b", and store the result in "k". + +k[31:0] := a[31:0] XOR b[31:0] +k[MAX:32] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 64-bit masks "a" and "b", and store the result in "k". + +k[63:0] := a[63:0] XOR b[63:0] +k[MAX:64] := 0 + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 32-bit mask "a" left by "count" while shifting in zeros, and store the least significant 32 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 31 + k[31:0] := a[31:0] << count[7:0] +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 64-bit mask "a" left by "count" while shifting in zeros, and store the least significant 64 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 63 + k[63:0] := a[63:0] << count[7:0] +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 32-bit mask "a" right by "count" while shifting in zeros, and store the least significant 32 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 31 + k[31:0] := a[31:0] >> count[7:0] +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 64-bit mask "a" right by "count" while shifting in zeros, and store the least significant 64 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 63 + k[63:0] := a[63:0] >> count[7:0] +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". + +tmp[31:0] := a[31:0] OR b[31:0] +IF tmp[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +IF tmp[31:0] == 0xFFFFFFFF + MEM[all_ones+7:all_ones] := 1 +ELSE + MEM[all_ones+7:all_ones] := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[31:0] := a[31:0] OR b[31:0] +IF tmp[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". + +tmp[31:0] := a[31:0] OR b[31:0] +IF tmp[31:0] == 0xFFFFFFFF + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". + +tmp[63:0] := a[63:0] OR b[63:0] +IF tmp[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +IF tmp[7:0] == 0xFFFFFFFFFFFFFFFF + MEM[all_ones+7:all_ones] := 1 +ELSE + MEM[all_ones+7:all_ones] := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[63:0] := a[63:0] OR b[63:0] +IF tmp[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". + +tmp[63:0] := a[63:0] OR b[63:0] +IF tmp[63:0] == 0xFFFFFFFFFFFFFFFF + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise AND of 32-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". + +tmp1[31:0] := a[31:0] AND b[31:0] +IF tmp1[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +tmp2[31:0] := (NOT a[31:0]) AND b[31:0] +IF tmp2[31:0] == 0x0 + MEM[and_not+7:and_not] := 1 +ELSE + MEM[and_not+7:and_not] := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 32-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". + +tmp[31:0] := a[31:0] AND b[31:0] +IF tmp[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 32-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[31:0] := (NOT a[31:0]) AND b[31:0] +IF tmp[31:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise AND of 64-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". + +tmp1[63:0] := a[63:0] AND b[63:0] +IF tmp1[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +tmp2[63:0] := (NOT a[63:0]) AND b[63:0] +IF tmp2[63:0] == 0x0 + MEM[and_not+7:and_not] := 1 +ELSE + MEM[and_not+7:and_not] := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 64-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". + +tmp[63:0] := a[63:0] AND b[63:0] +IF tmp[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 64-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[63:0] := (NOT a[63:0]) AND b[63:0] +IF tmp[63:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Convert 32-bit mask "a" into an integer value, and store the result in "dst". + +dst := ZeroExtend32(a[31:0]) + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Convert 64-bit mask "a" into an integer value, and store the result in "dst". + +dst := ZeroExtend64(a[63:0]) + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Convert integer value "a" into an 32-bit mask, and store the result in "k". + +k := ZeroExtend32(a[31:0]) + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + Convert integer value "a" into an 64-bit mask, and store the result in "k". + +k := ZeroExtend64(a[63:0]) + + + AVX512BW +
immintrin.h
+ Mask +
+ + + + + + Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ZeroExtend64(k[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ZeroExtend64(k[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ZeroExtend32(k[15:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ZeroExtend32(k[15:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*32 + FOR k := 0 to j-1 + m := k*32 + dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*32 + FOR k := 0 to j-1 + m := k*32 + dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*64 + FOR k := 0 to j-1 + m := k*64 + dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 1 + i := j*64 + FOR k := 0 to j-1 + m := k*64 + dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Compare +
+ + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512CD + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ZeroExtend64(k[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ZeroExtend32(k[15:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Swizzle +
+ + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 15 + i := j*32 + FOR k := 0 to j-1 + m := k*32 + dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + + Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + FOR l := 0 to j-1 + m := l*32 + dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 + ENDFOR + dst[i+31:i+j] := 0 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*64 + FOR k := 0 to j-1 + m := k*64 + dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + + Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + FOR l := 0 to j-1 + m := l*64 + dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 + ENDFOR + dst[i+63:i+j] := 0 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Compare +
+ + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp := 31 + dst[i+31:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+31:i] := dst[i+31:i] + 1 + OD + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp := 63 + dst[i+63:i] := 0 + DO WHILE (tmp >= 0 AND a[i+tmp] == 0) + tmp := tmp - 1 + dst[i+63:i] := dst[i+63:i] + 1 + OD + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512CD +
immintrin.h
+ Bit Manipulation +
+ + + + + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Logical +
+ + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. + +FOR j := 0 to 3 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 3 + i := j*64 + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 1 + i := j*64 + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) +ENDFOR +k[MAX:2] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 7 + i := j*32 + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 3 + i := j*32 + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE imm8[0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE imm8[0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". + +FOR j := 0 to 7 + i := j*32 + IF a[i+31] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". + +FOR j := 0 to 3 + i := j*32 + IF a[i+31] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := 0xFFFFFFFF + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := 0xFFFFFFFF + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := 0xFFFFFFFFFFFFFFFF + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := 0xFFFFFFFFFFFFFFFF + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". + +FOR j := 0 to 3 + i := j*64 + IF a[i+63] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". + +FOR j := 0 to 1 + i := j*64 + IF a[i+63] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + RETURN tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Logical +
+ + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 2)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 8)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 2)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[255:0] := a[255:0] +1: dst[255:0] := a[511:256] +ESAC +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[1:0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +2: dst[127:0] := a[383:256] +3: dst[127:0] := a[511:384] +ESAC +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[255:0] := a[255:0] +1: dst[255:0] := a[511:256] +ESAC +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[1:0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +2: dst[127:0] := a[383:256] +3: dst[127:0] := a[511:384] +ESAC +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 7 + i := j*64 + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR j := 0 to 15 + i := j*32 + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) +ENDFOR +k[MAX:16] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k". + [fpclass_note] + k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0]) +k[MAX:1] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + [fpclass_note] + IF k1[0] + k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0]) +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k. + [fpclass_note] + k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0]) +k[MAX:1] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + [fpclass_note] + IF k1[0] + k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0]) +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[0]) OF +0: dst[255:0] := b[255:0] +1: dst[511:256] := b[255:0] +ESAC +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE imm8[1:0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +2: dst[383:256] := b[127:0] +3: dst[511:384] := b[127:0] +ESAC +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE imm8[0] OF +0: dst[255:0] := b[255:0] +1: dst[511:256] := b[255:0] +ESAC +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE imm8[1:0] OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +2: dst[383:256] := b[127:0] +3: dst[511:384] := b[127:0] +ESAC +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". + +FOR j := 0 to 15 + i := j*32 + IF a[i+31] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := 0xFFFFFFFF + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := 0xFFFFFFFFFFFFFFFF + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". + +FOR j := 0 to 7 + i := j*64 + IF a[i+63] + k[j] := 1 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[63:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] + 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] + 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] + 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) + 1: dst[63:0] := tmp[63:0] + 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) + 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) + ESAC + + RETURN dst +} +dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +IF k[0] + dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. + imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] + +DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { + CASE opCtl[1:0] OF + 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] + 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] + 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] + 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] + ESAC + + CASE signSelCtl[1:0] OF + 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) + 1: dst[31:0] := tmp[31:0] + 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) + 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) + ESAC + + RETURN dst +} +dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + tmp[63:0] := src1[63:0] - tmp[63:0] + IF IsInf(tmp[63:0]) + tmp[63:0] := FP64(0.0) + FI + RETURN tmp[63:0] +} +dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + tmp[31:0] := src1[31:0] - tmp[31:0] + IF IsInf(tmp[31:0]) + tmp[31:0] := FP32(0.0) + FI + RETURN tmp[31:0] +} +dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512DQ +
immintrin.h
+ Miscellaneous +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512DQ +
immintrin.h
+ Convert +
+ + + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := a[i+63:i] * b[i+63:i] + dst[i+63:i] := tmp[63:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512DQ +
immintrin.h
+ Arithmetic +
+ + + + + Add 8-bit masks in "a" and "b", and store the result in "k". + +k[7:0] := a[7:0] + b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Add 16-bit masks in "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] + b[15:0] +k[MAX:16] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 8-bit masks "a" and "b", and store the result in "k". + +k[7:0] := a[7:0] AND b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 8-bit masks "a" and then AND with "b", and store the result in "k". + +k[7:0] := (NOT a[7:0]) AND b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 8-bit mask "a", and store the result in "k". + +k[7:0] := NOT a[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 8-bit masks "a" and "b", and store the result in "k". + +k[7:0] := a[7:0] OR b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 8-bit masks "a" and "b", and store the result in "k". + +k[7:0] := NOT (a[7:0] XOR b[7:0]) +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 8-bit masks "a" and "b", and store the result in "k". + +k[7:0] := a[7:0] XOR b[7:0] +k[MAX:8] := 0 + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 8-bit mask "a" left by "count" while shifting in zeros, and store the least significant 8 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 7 + k[7:0] := a[7:0] << count[7:0] +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 8-bit mask "a" right by "count" while shifting in zeros, and store the least significant 8 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 7 + k[7:0] := a[7:0] >> count[7:0] +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". + +tmp[7:0] := a[7:0] OR b[7:0] +IF tmp[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +IF tmp[7:0] == 0xFF + MEM[all_ones+7:all_ones] := 1 +ELSE + MEM[all_ones+7:all_ones] := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[7:0] := a[7:0] OR b[7:0] +IF tmp[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". + +tmp[7:0] := a[7:0] OR b[7:0] +IF tmp[7:0] == 0xFF + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise AND of 8-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". + +tmp1[7:0] := a[7:0] AND b[7:0] +IF tmp1[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +tmp2[7:0] := (NOT a[7:0]) AND b[7:0] +IF tmp2[7:0] == 0x0 + MEM[and_not+7:and_not] := 1 +ELSE + MEM[and_not+7:and_not] := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 8-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". + +tmp[7:0] := a[7:0] AND b[7:0] +IF tmp[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 8-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[7:0] := (NOT a[7:0]) AND b[7:0] +IF tmp[7:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise AND of 16-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". + +tmp1[15:0] := a[15:0] AND b[15:0] +IF tmp1[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +tmp2[15:0] := (NOT a[15:0]) AND b[15:0] +IF tmp2[15:0] == 0x0 + MEM[and_not+7:and_not] := 1 +ELSE + MEM[and_not+7:and_not] := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 16-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". + +tmp[15:0] := a[15:0] AND b[15:0] +IF tmp[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 16-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[15:0] := (NOT a[15:0]) AND b[15:0] +IF tmp[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + Convert 8-bit mask "a" into an integer value, and store the result in "dst". + +dst := ZeroExtend32(a[7:0]) + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + Convert integer value "a" into an 8-bit mask, and store the result in "k". + +k := a[7:0] + + + AVX512DQ +
immintrin.h
+ Mask +
+ + + + Load 8-bit mask from memory into "k". + +k[7:0] := MEM[mem_addr+7:mem_addr] + + + AVX512DQ +
immintrin.h
+ Load +
+ + + + + Store 8-bit mask from "a" into memory. + +MEM[mem_addr+7:mem_addr] := a[7:0] + + + AVX512DQ +
immintrin.h
+ Store +
+ + + + + + Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ACOS(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ACOS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ACOS(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ACOS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ACOSH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ACOSH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ACOSH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ACOSH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ASIN(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ASIN(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ASIN(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ASIN(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ASINH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ASINH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ASINH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ASINH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ATAN(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ATAN(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ATAN(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ATAN(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ATANH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ATANH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperblic tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ATANH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ATANH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := COS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := COS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := COSD(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := COSD(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := COSD(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := COSD(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := COSH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := COSH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := COSH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := COSH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SIN(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SIN(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SINH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SINH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SINH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SINH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SIND(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SIND(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SIND(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SIND(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := TAN(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := TAN(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := TAN(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := TAN(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := TAND(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := TAND(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := TAND(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := TAND(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := TANH(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := TANH(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := TANH(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := TANH(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) + MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + + + Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SIN(a[i+63:i]) + MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) + ELSE + dst[i+63:i] := sin_src[i+63:i] + MEM[mem_addr+i+63:mem_addr+i] := cos_src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + + + + + Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SIN(a[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) + ELSE + dst[i+31:i] := sin_src[i+31:i] + MEM[mem_addr+i+31:mem_addr+i] := cos_src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Trigonometry +
+ + + + Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := CubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := CubeRoot(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := CubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := CubeRoot(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(10.0, a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(10.0, a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(2.0, a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(2.0, a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(e, a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(FP32(e), a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := InvSQRT(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := InvSQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := InvSQRT(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := InvSQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LOG(1.0 + a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LOG(1.0 + a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LOG(1.0 + a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LOG(1.0 + a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LOG(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LOG(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (1.0 / a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := CDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := CDFNormal(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := CDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := CDFNormal(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := InverseCDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := InverseCDFNormal(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := InverseCDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := InverseCDFNormal(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ERF(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ERF(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := 1.0 - ERF(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := 1.0 - ERF(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ERF(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ERF(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+63:i] := 1.0 - ERF(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+63:i] := 1.0 - ERF(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := 1.0 / ERF(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := 1.0 / ERF(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+63:i] := 1.0 / ERF(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+63:i] := 1.0 / ERF(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + + + Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Probability/Statistics +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := CEIL(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := CEIL(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FLOOR(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FLOOR(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := NearbyInt(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := NearbyInt(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed single-precision floating-point elements in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := NearbyInt(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := NearbyInt(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RoundToNearestEven(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundToNearestEven(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RoundToNearestEven(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundToNearestEven(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ROUND(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := TRUNCATE(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := TRUNCATE(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := TRUNCATE(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := TRUNCATE(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 63 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 31 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 63 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 31 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 7 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 63 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 31 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 63 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 31 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 7 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ABS(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ABS(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] :=0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (1.0 / a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (1.0 / a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst". + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (32*imm8[2:0]) +dst[255:0] := temp[255:0] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (32*imm8[2:0]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (32*imm8[2:0]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst". + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (32*imm8[1:0]) +dst[127:0] := temp[127:0] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (32*imm8[1:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (32*imm8[1:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst". + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (64*imm8[1:0]) +dst[255:0] := temp[255:0] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (64*imm8[1:0]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[511:256] := a[255:0] +temp[255:0] := b[255:0] +temp[511:0] := temp[511:0] >> (64*imm8[1:0]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst". + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (64*imm8[0]) +dst[127:0] := temp[127:0] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (64*imm8[0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[255:128] := a[127:0] +temp[127:0] := b[127:0] +temp[255:0] := temp[255:0] >> (64*imm8[0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +ESAC +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN: j := 0 + SNAN_TOKEN: j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[255:0] := a[255:0] +CASE (imm8[0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +ESAC +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[255:0] := a[255:0] +CASE (imm8[0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +ESAC +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set) + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + off := idx[i+2:i]*32 + dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + off := idx[i+1:i]*32 + dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + off := idx[i+1:i]*64 + dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + IF k[j] + dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + off := idx[i]*64 + dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:128] := 0 + + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) +tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) +tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) +tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) +tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) +tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) +tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + dst[i+63:i] := a[id+63:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". + +FOR j := 0 to 7 + i := j*32 + id := idx[i+2:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" across lanes lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + id := idx[i+1:i]*64 + dst[i+63:i] := a[id+63:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +dst.m128[0] := a.m128[imm8[0]] +dst.m128[1] := b.m128[imm8[1]] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +dst.m128[0] := a.m128[imm8[0]] +dst.m128[1] := b.m128[imm8[1]] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". + +dst.m128[0] := a.m128[imm8[0]] +dst.m128[1] := b.m128[imm8[1]] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst.m128[0] := a.m128[imm8[0]] +tmp_dst.m128[1] := b.m128[imm8[1]] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". + +dst.m128[0] := a.m128[imm8[0]] +dst.m128[1] := b.m128[imm8[1]] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*64 + k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*32 + k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 3 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 1 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 3 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 3 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 1 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 3 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 3 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 3 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 3 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:4] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 1 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 1 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:2] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 7 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 3 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 3 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 1 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 4 packed 64-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed 32-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed 64-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed 32-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 4 packed 64-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 8 packed 32-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed 64-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed 32-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512F + AVX512VL +
immintrin.h
+ Store +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := src[m+63:m] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := src[m+63:m] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_Int32_To_FP64(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+31:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 32*j + dst[k+31:k] := Truncate32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[k+31:k] := Truncate32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+31:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+63:i]) +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 32*j + dst[k+31:k] := Saturate32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[k+31:k] := Saturate32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*16 + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + l := j*16 + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+31:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+31:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+63:i]) +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:16] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 32*j + dst[k+31:k] := SaturateU32(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[k+31:k] := SaturateU32(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := 64*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+63:i]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) + FI +ENDFOR + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in th elow 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 3 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 1 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 4 packed 64-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 8 packed 32-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 2 packed 64-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 4 packed 32-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 4 packed 64-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 8 packed 32-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 2 packed 64-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 4 packed 32-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Load +
+ + + + + + Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +tmp[191:128] := a[191:128] +tmp[255:192] := a[191:128] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +tmp[191:128] := a[191:128] +tmp[255:192] := a[191:128] +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +tmp[159:128] := a[191:160] +tmp[191:160] := a[191:160] +tmp[223:192] := a[255:224] +tmp[255:224] := a[255:224] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +tmp[159:128] := a[191:160] +tmp[191:160] := a[191:160] +tmp[223:192] := a[255:224] +tmp[255:224] := a[255:224] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +tmp[159:128] := a[159:128] +tmp[191:160] := a[159:128] +tmp[223:192] := a[223:192] +tmp[255:224] := a[223:192] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +tmp[159:128] := a[159:128] +tmp[191:160] := a[159:128] +tmp[223:192] := a[223:192] +tmp[255:224] := a[223:192] +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Move +
+ + + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*32 + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*32 + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 3 + i := j*64 + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 1 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 1 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 1 + i := j*64 + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Logical +
+ + + + + + Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + + Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Set +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." + FOR j := 0 to 3 + i := j*128 + a[i+127:i] := ShiftRows(a[i+127:i]) + a[i+127:i] := SubBytes(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F + VAES +
immintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." + FOR j := 0 to 3 + i := j*128 + a[i+127:i] := ShiftRows(a[i+127:i]) + a[i+127:i] := SubBytes(a[i+127:i]) + a[i+127:i] := MixColumns(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F + VAES +
immintrin.h
+ Cryptography +
+ + + + + Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". + FOR j := 0 to 3 + i := j*128 + a[i+127:i] := InvShiftRows(a[i+127:i]) + a[i+127:i] := InvSubBytes(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F + VAES +
immintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". + FOR j := 0 to 3 + i := j*128 + a[i+127:i] := InvShiftRows(a[i+127:i]) + a[i+127:i] := InvSubBytes(a[i+127:i]) + a[i+127:i] := InvMixColumns(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F + VAES +
immintrin.h
+ Cryptography +
+ + + + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := a[63:0] + b[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] + b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] + b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] + b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] + b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := a[31:0] + b[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] + b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] + b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] + b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] + b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + dst[i+63:i] := a[i+63:i] / b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", =and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := 64*j + dst[i+63:i] := a[i+63:i] / b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := 64*j + IF k[j] + dst[i+63:i] := a[i+63:i] / b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := a[i+31:i] / b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := a[i+31:i] / b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := a[i+31:i] / b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := a[63:0] / b[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] / b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] / b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] / b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] / b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := a[31:0] / b[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] / b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] / b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] / b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] / b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "a" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := c[63:0] +FI +dst[127:64] := c[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := c[31:0] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] * b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] * b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] * b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] * b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := a[63:0] * b[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] * b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] * b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] * b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] * b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := a[31:0] * b[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] - b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] - b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := a[63:0] - b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := a[63:0] - b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := a[63:0] - b[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] - b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] - b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := a[31:0] - b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := a[31:0] - b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := a[31:0] - b[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 16-bit mask from "a" into memory. + +MEM[mem_addr+15:mem_addr] := a[15:0] + + + AVX512F +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits of integer data from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits of integer data from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store the lower double-precision (64-bit) floating-point element from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + MEM[mem_addr+63:mem_addr] := a[63:0] +FI + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store the lower single-precision (32-bit) floating-point element from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + MEM[mem_addr+31:mem_addr] := a[31:0] +FI + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 32 +m := base_addr +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 64 +m := base_addr +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Store +
+ + + + Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 16-bit mask from memory into "k". + +k[15:0] := MEM[mem_addr+15:mem_addr] + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits of integer data from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits of integer data from memory into "dst" using a non-temporal memory hint. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + dst[63:0] := MEM[mem_addr+63:mem_addr] +ELSE + dst[63:0] := src[63:0] +FI +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + dst[63:0] := MEM[mem_addr+63:mem_addr] +ELSE + dst[63:0] := 0 +FI +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + dst[31:0] := MEM[mem_addr+31:mem_addr] +ELSE + dst[31:0] := src[31:0] +FI +dst[MAX:32] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +IF k[0] + dst[31:0] := MEM[mem_addr+31:mem_addr] +ELSE + dst[31:0] := 0 +FI +dst[MAX:32] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 7 + i := j*64 + m := j*64 + IF k[j] + addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] AND b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k". + +k[15:0] := (NOT a[15:0]) AND b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 16-bit mask "a", and store the result in "k". + +k[15:0] := NOT a[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] OR b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := NOT (a[15:0] XOR b[15:0]) +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] XOR b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 16-bit mask "a" left by "count" while shifting in zeros, and store the least significant 16 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 15 + k[15:0] := a[15:0] << count[7:0] +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Shift the bits of 16-bit mask "a" right by "count" while shifting in zeros, and store the least significant 16 bits of the result in "k". + +k[MAX:0] := 0 +IF count[7:0] <= 15 + k[15:0] := a[15:0] >> count[7:0] +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + + Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". + +tmp[15:0] := a[15:0] OR b[15:0] +IF tmp[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI +IF tmp[15:0] == 0xFFFF + MEM[all_ones+7:all_ones] := 1 +ELSE + MEM[all_ones+7:all_ones] := 0 +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". + +tmp[15:0] := a[15:0] OR b[15:0] +IF tmp[15:0] == 0x0 + dst := 1 +ELSE + dst := 0 +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". + +tmp[15:0] := a[15:0] OR b[15:0] +IF tmp[15:0] == 0xFFFF + dst := 1 +ELSE + dst := 0 +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Convert 16-bit mask "a" into an integer value, and store the result in "dst". + +dst := ZeroExtend32(a[15:0]) + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Convert integer value "a" into an 16-bit mask, and store the result in "k". + +k := ZeroExtend16(a[15:0]) + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k". + +k[15:0] := (NOT a[15:0]) AND b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] AND b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Copy 16-bit mask "a" to "k". + +k[15:0] := a[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Compute the bitwise NOT of 16-bit mask "a", and store the result in "k". + +k[15:0] := NOT a[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] OR b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Unpack and interleave 8 bits from masks "a" and "b", and store the 16-bit result in "k". + +k[7:0] := b[7:0] +k[15:8] := a[7:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := NOT (a[15:0] XOR b[15:0]) +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k". + +k[15:0] := a[15:0] XOR b[15:0] +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0. + dst[15:0] := k1[15:0] | k2[15:0] +IF dst == 0 + SetZF() +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's. + dst[15:0] := k1[15:0] | k2[15:0] +IF PopCount(dst[15:0]) == 16 + SetCF() +FI + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Converts bit mask "k1" into an integer value, storing the results in "dst". + +dst := ZeroExtend32(k1) + + + AVX512F +
immintrin.h
+ Mask +
+ + + + Converts integer "mask" into bitmask, storing the result in "dst". + +dst := mask[15:0] + + + AVX512F +
immintrin.h
+ Mask +
+ + + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and stores the low 64 bytes (16 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 64 bytes (8 elements) in "dst". + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) +dst[511:0] := temp[511:0] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 64 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and stores the low 64 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := temp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +IF k[0] + dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +IF k[0] + dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +ELSE + dst[63:0] := a[63:0] +FI +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +IF k[0] + dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { + tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] + CASE(tsrc[63:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[63:0] := src1[63:0] + 1 : dest[63:0] := tsrc[63:0] + 2 : dest[63:0] := QNaN(tsrc[63:0]) + 3 : dest[63:0] := QNAN_Indefinite + 4 : dest[63:0] := -INF + 5 : dest[63:0] := +INF + 6 : dest[63:0] := tsrc.sign? -INF : +INF + 7 : dest[63:0] := -0 + 8 : dest[63:0] := +0 + 9 : dest[63:0] := -1 + 10: dest[63:0] := +1 + 11: dest[63:0] := 1/2 + 12: dest[63:0] := 90.0 + 13: dest[63:0] := PI/2 + 14: dest[63:0] := MAX_FLOAT + 15: dest[63:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[63:0] +} +IF k[0] + dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := b[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +IF k[0] + dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +IF k[0] + dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +ELSE + dst[31:0] := a[31:0] +FI +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + [sae_note] + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +IF k[0] + dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. + enum TOKEN_TYPE { + QNAN_TOKEN := 0, \ + SNAN_TOKEN := 1, \ + ZERO_VALUE_TOKEN := 2, \ + ONE_VALUE_TOKEN := 3, \ + NEG_INF_TOKEN := 4, \ + POS_INF_TOKEN := 5, \ + NEG_VALUE_TOKEN := 6, \ + POS_VALUE_TOKEN := 7 +} +DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { + tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] + CASE(tsrc[31:0]) OF + QNAN_TOKEN:j := 0 + SNAN_TOKEN:j := 1 + ZERO_VALUE_TOKEN: j := 2 + ONE_VALUE_TOKEN: j := 3 + NEG_INF_TOKEN: j := 4 + POS_INF_TOKEN: j := 5 + NEG_VALUE_TOKEN: j := 6 + POS_VALUE_TOKEN: j := 7 + ESAC + + token_response[3:0] := src3[3+4*j:4*j] + + CASE(token_response[3:0]) OF + 0 : dest[31:0] := src1[31:0] + 1 : dest[31:0] := tsrc[31:0] + 2 : dest[31:0] := QNaN(tsrc[31:0]) + 3 : dest[31:0] := QNAN_Indefinite + 4 : dest[31:0] := -INF + 5 : dest[31:0] := +INF + 6 : dest[31:0] := tsrc.sign? -INF : +INF + 7 : dest[31:0] := -0 + 8 : dest[31:0] := +0 + 9 : dest[31:0] := -1 + 10: dest[31:0] := +1 + 11: dest[31:0] := 1/2 + 12: dest[31:0] := 90.0 + 13: dest[31:0] := PI/2 + 14: dest[31:0] := MAX_FLOAT + 15: dest[31:0] := -MAX_FLOAT + ESAC + + CASE(tsrc[31:0]) OF + ZERO_VALUE_TOKEN: + IF (imm8[0]) #ZE; FI + ZERO_VALUE_TOKEN: + IF (imm8[1]) #IE; FI + ONE_VALUE_TOKEN: + IF (imm8[2]) #ZE; FI + ONE_VALUE_TOKEN: + IF (imm8[3]) #IE; FI + SNAN_TOKEN: + IF (imm8[4]) #IE; FI + NEG_INF_TOKEN: + IF (imm8[5]) #IE; FI + NEG_VALUE_TOKEN: + IF (imm8[6]) #IE; FI + POS_INF_TOKEN: + IF (imm8[7]) #IE; FI + ESAC + RETURN dest[31:0] +} +IF k[0] + dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := b[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + dst[63:0] := ConvertExpFP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + dst[63:0] := ConvertExpFP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + IF k[0] + dst[63:0] := ConvertExpFP64(b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst[63:0] := ConvertExpFP64(b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + IF k[0] + dst[63:0] := ConvertExpFP64(b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst[63:0] := ConvertExpFP64(b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + dst[31:0] := ConvertExpFP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + dst[31:0] := ConvertExpFP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + IF k[0] + dst[31:0] := ConvertExpFP32(b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst[31:0] := ConvertExpFP32(b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + [sae_note] + IF k[0] + dst[31:0] := ConvertExpFP32(b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst[31:0] := ConvertExpFP32(b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + IF k[0] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + IF k[0] + dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + IF k[0] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + IF k[0] + dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +IF k[0] + dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note] + +DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { + m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) + IF IsInf(tmp[63:0]) + tmp[63:0] := src1[63:0] + FI + RETURN tmp[63:0] +} +dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +IF k[0] + dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { + m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) + IF IsInf(tmp[31:0]) + tmp[31:0] := src1[31:0] + FI + RETURN tmp[31:0] +} +dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[31:0] +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +IF k[0] + dst[63:0] := SCALE(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +IF k[0] + dst[63:0] := SCALE(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +IF k[0] + dst[63:0] := SCALE(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +IF k[0] + dst[63:0] := SCALE(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +dst[63:0] := SCALE(a[63:0], b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) + RETURN dst[63:0] +} +dst[63:0] := SCALE(a[63:0], b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +IF k[0] + dst[31:0] := SCALE(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +IF k[0] + dst[31:0] := SCALE(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +IF k[0] + dst[31:0] := SCALE(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +IF k[0] + dst[31:0] := SCALE(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +dst[31:0] := SCALE(a[31:0], b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + DEFINE SCALE(src1, src2) { + IF (src2 == NaN) + IF (src2 == SNaN) + RETURN QNAN(src2) + FI + ELSE IF (src1 == NaN) + IF (src1 == SNaN) + RETURN QNAN(src1) + FI + IF (src2 != INF) + RETURN QNAN(src1) + FI + ELSE + tmp_src2 := src2 + tmp_src1 := src1 + IF (IS_DENORMAL(src2) AND MXCSR.DAZ) + tmp_src2 := 0 + FI + IF (IS_DENORMAL(src1) AND MXCSR.DAZ) + tmp_src1 := 0 + FI + FI + dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) + RETURN dst[63:0] +} +dst[31:0] := SCALE(a[31:0], b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + dst[i+31:i] := a[n+31:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + n := (j % 4)*32 + IF k[j] + dst[i+31:i] := a[n+31:n] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + dst[i+63:i] := a[n+63:n] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + n := (j % 4)*64 + IF k[j] + dst[i+63:i] := a[n+63:n] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[1:0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +2: dst[127:0] := a[383:256] +3: dst[127:0] := a[511:384] +ESAC +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[255:0] := a[255:0] +1: dst[255:0] := a[511:256] +ESAC +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[1:0] OF +0: dst[127:0] := a[127:0] +1: dst[127:0] := a[255:128] +2: dst[127:0] := a[383:256] +3: dst[127:0] := a[511:384] +ESAC +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[1:0] OF +0: tmp[127:0] := a[127:0] +1: tmp[127:0] := a[255:128] +2: tmp[127:0] := a[383:256] +3: tmp[127:0] := a[511:384] +ESAC +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". + +CASE imm8[0] OF +0: dst[255:0] := a[255:0] +1: dst[255:0] := a[511:256] +ESAC +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +CASE imm8[0] OF +0: tmp[255:0] := a[255:0] +1: tmp[255:0] := a[511:256] +ESAC +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +2: dst[383:256] := b[127:0] +3: dst[511:384] := b[127:0] +ESAC +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[0]) OF +0: dst[255:0] := b[255:0] +1: dst[511:256] := b[255:0] +ESAC +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: dst[127:0] := b[127:0] +1: dst[255:128] := b[127:0] +2: dst[383:256] := b[127:0] +3: dst[511:384] := b[127:0] +ESAC +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[1:0]) OF +0: tmp[127:0] := b[127:0] +1: tmp[255:128] := b[127:0] +2: tmp[383:256] := b[127:0] +3: tmp[511:384] := b[127:0] +ESAC +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". + +dst[511:0] := a[511:0] +CASE (imm8[0]) OF +0: dst[255:0] := b[255:0] +1: dst[511:256] := b[255:0] +ESAC +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[511:0] := a[511:0] +CASE (imm8[0]) OF +0: tmp[255:0] := b[255:0] +1: tmp[511:256] := b[255:0] +ESAC +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 32 +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 32 +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[m+size-1:m] := a[i+31:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 64 +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 64 +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[m+size-1:m] := a[i+63:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set) + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := idx[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + off := idx[i+3:i]*32 + dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := idx[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + off := idx[i+2:i]*64 + dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI +IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]; FI +IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]; FI +IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]; FI +IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]; FI +IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]; FI +IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]; FI +IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]; FI +IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]; FI +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI +IF (b[257] == 0) tmp_dst[319:256] := a[319:256]; FI +IF (b[257] == 1) tmp_dst[319:256] := a[383:320]; FI +IF (b[321] == 0) tmp_dst[383:320] := a[319:256]; FI +IF (b[321] == 1) tmp_dst[383:320] := a[383:320]; FI +IF (b[385] == 0) tmp_dst[447:384] := a[447:384]; FI +IF (b[385] == 1) tmp_dst[447:384] := a[511:448]; FI +IF (b[449] == 0) tmp_dst[511:448] := a[447:384]; FI +IF (b[449] == 1) tmp_dst[511:448] := a[511:448]; FI +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI +IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]; FI +IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]; FI +IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]; FI +IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]; FI +IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]; FI +IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]; FI +IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]; FI +IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]; FI +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI +IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI +IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI +IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI +IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI +IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI +IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI +IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI +IF (b[257] == 0) tmp_dst[319:256] := a[319:256]; FI +IF (b[257] == 1) tmp_dst[319:256] := a[383:320]; FI +IF (b[321] == 0) tmp_dst[383:320] := a[319:256]; FI +IF (b[321] == 1) tmp_dst[383:320] := a[383:320]; FI +IF (b[385] == 0) tmp_dst[447:384] := a[447:384]; FI +IF (b[385] == 1) tmp_dst[447:384] := a[511:448]; FI +IF (b[449] == 0) tmp_dst[511:448] := a[447:384]; FI +IF (b[449] == 1) tmp_dst[511:448] := a[511:448]; FI +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI +IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI +IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI +IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI +IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI +IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI +IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI +IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI +IF (imm8[4] == 0) dst[319:256] := a[319:256]; FI +IF (imm8[4] == 1) dst[319:256] := a[383:320]; FI +IF (imm8[5] == 0) dst[383:320] := a[319:256]; FI +IF (imm8[5] == 1) dst[383:320] := a[383:320]; FI +IF (imm8[6] == 0) dst[447:384] := a[447:384]; FI +IF (imm8[6] == 1) dst[447:384] := a[511:448]; FI +IF (imm8[7] == 0) dst[511:448] := a[447:384]; FI +IF (imm8[7] == 1) dst[511:448] := a[511:448]; FI +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". + +IF (b[1] == 0) dst[63:0] := a[63:0]; FI +IF (b[1] == 1) dst[63:0] := a[127:64]; FI +IF (b[65] == 0) dst[127:64] := a[63:0]; FI +IF (b[65] == 1) dst[127:64] := a[127:64]; FI +IF (b[129] == 0) dst[191:128] := a[191:128]; FI +IF (b[129] == 1) dst[191:128] := a[255:192]; FI +IF (b[193] == 0) dst[255:192] := a[191:128]; FI +IF (b[193] == 1) dst[255:192] := a[255:192]; FI +IF (b[257] == 0) dst[319:256] := a[319:256]; FI +IF (b[257] == 1) dst[319:256] := a[383:320]; FI +IF (b[321] == 0) dst[383:320] := a[319:256]; FI +IF (b[321] == 1) dst[383:320] := a[383:320]; FI +IF (b[385] == 0) dst[447:384] := a[447:384]; FI +IF (b[385] == 1) dst[447:384] := a[511:448]; FI +IF (b[449] == 0) dst[511:448] := a[447:384]; FI +IF (b[449] == 1) dst[511:448] := a[511:448]; FI +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) +tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) +tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) +tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) +tmp_dst[287:256] := SELECT4(a[383:256], b[257:256]) +tmp_dst[319:288] := SELECT4(a[383:256], b[289:288]) +tmp_dst[351:320] := SELECT4(a[383:256], b[321:320]) +tmp_dst[383:352] := SELECT4(a[383:256], b[353:352]) +tmp_dst[415:384] := SELECT4(a[511:384], b[385:384]) +tmp_dst[447:416] := SELECT4(a[511:384], b[417:416]) +tmp_dst[479:448] := SELECT4(a[511:384], b[449:448]) +tmp_dst[511:480] := SELECT4(a[511:384], b[481:480]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) +tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) +tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) +tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) +tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) +tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) +tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) +tmp_dst[287:256] := SELECT4(a[383:256], b[257:256]) +tmp_dst[319:288] := SELECT4(a[383:256], b[289:288]) +tmp_dst[351:320] := SELECT4(a[383:256], b[321:320]) +tmp_dst[383:352] := SELECT4(a[383:256], b[353:352]) +tmp_dst[415:384] := SELECT4(a[511:384], b[385:384]) +tmp_dst[447:416] := SELECT4(a[511:384], b[417:416]) +tmp_dst[479:448] := SELECT4(a[511:384], b[449:448]) +tmp_dst[511:480] := SELECT4(a[511:384], b[481:480]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], b[1:0]) +dst[63:32] := SELECT4(a[127:0], b[33:32]) +dst[95:64] := SELECT4(a[127:0], b[65:64]) +dst[127:96] := SELECT4(a[127:0], b[97:96]) +dst[159:128] := SELECT4(a[255:128], b[129:128]) +dst[191:160] := SELECT4(a[255:128], b[161:160]) +dst[223:192] := SELECT4(a[255:128], b[193:192]) +dst[255:224] := SELECT4(a[255:128], b[225:224]) +dst[287:256] := SELECT4(a[383:256], b[257:256]) +dst[319:288] := SELECT4(a[383:256], b[289:288]) +dst[351:320] := SELECT4(a[383:256], b[321:320]) +dst[383:352] := SELECT4(a[383:256], b[353:352]) +dst[415:384] := SELECT4(a[511:384], b[385:384]) +dst[447:416] := SELECT4(a[511:384], b[417:416]) +dst[479:448] := SELECT4(a[511:384], b[449:448]) +dst[511:480] := SELECT4(a[511:384], b[481:480]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + dst[i+63:i] := a[id+63:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + IF k[j] + dst[i+63:i] := a[id+63:id] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[63:0] := src[63:0] + 1: tmp[63:0] := src[127:64] + 2: tmp[63:0] := src[191:128] + 3: tmp[63:0] := src[255:192] + ESAC + RETURN tmp[63:0] +} +dst[63:0] := SELECT4(a[255:0], imm8[1:0]) +dst[127:64] := SELECT4(a[255:0], imm8[3:2]) +dst[191:128] := SELECT4(a[255:0], imm8[5:4]) +dst[255:192] := SELECT4(a[255:0], imm8[7:6]) +dst[319:256] := SELECT4(a[511:256], imm8[1:0]) +dst[383:320] := SELECT4(a[511:256], imm8[3:2]) +dst[447:384] := SELECT4(a[511:256], imm8[5:4]) +dst[511:448] := SELECT4(a[511:256], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + id := idx[i+2:i]*64 + dst[i+63:i] := a[id+63:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[m+31:m] + m := m + 32 + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[m+63:m] + m := m + 64 + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[127:0] := src[127:0] + 1: tmp[127:0] := src[255:128] + 2: tmp[127:0] := src[383:256] + 3: tmp[127:0] := src[511:384] + ESAC + RETURN tmp[127:0] +} +dst[127:0] := SELECT4(a[511:0], imm8[1:0]) +dst[255:128] := SELECT4(a[511:0], imm8[3:2]) +dst[383:256] := SELECT4(b[511:0], imm8[5:4]) +dst[511:384] := SELECT4(b[511:0], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] +tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] +tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] +tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] +tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] +tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] +tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". + +dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] +dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] +dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] +dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] +dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] +dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +dst[127:96] := SELECT4(b[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(b[255:128], imm8[5:4]) +dst[255:224] := SELECT4(b[255:128], imm8[7:6]) +dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +dst[351:320] := SELECT4(b[383:256], imm8[5:4]) +dst[383:352] := SELECT4(b[383:256], imm8[7:6]) +dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +dst[479:448] := SELECT4(b[511:384], imm8[5:4]) +dst[511:480] := SELECT4(b[511:384], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp_dst[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) +dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) +dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) +dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +RETURN ( a[63:0] OP b[63:0] ) ? 1 : 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +RETURN ( a[31:0] OP b[31:0] ) ? 1 : 0 + + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + m := j*64 + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := src[m+63:m] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + m := j*64 + IF k[j] + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) + ELSE + dst[m+63:m] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". [sae_note] + +FOR j := 0 to 15 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round2_note] + +FOR j := 0 to 15 + i := 16*j + l := 32*j + IF k[j] + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP64_To_Int32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP64_To_Int32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_Int32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP64_To_UInt32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP64_To_UInt64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_UInt32(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_UInt64(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [sae_note] + +dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [sae_note] + +IF k[0] + dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [sae_note] + +IF k[0] + dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP32_To_Int64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP32_To_Int64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_Int64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + [round_note] + +dst[31:0] := Convert_FP32_To_UInt32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + [round_note] + +dst[63:0] := Convert_FP32_To_UInt64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_UInt32(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_UInt64(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 32*j + l := 64*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP64_To_UInt32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP64_To_UInt64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_UInt32_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_UInt64_Truncate(a[63:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[31:0] := Convert_FP32_To_UInt32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + [sae_note] + +dst[63:0] := Convert_FP32_To_UInt64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_UInt32_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_UInt64_Truncate(a[31:0]) + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + l := j*32 + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + IF k[j] + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[k+7:k] := Truncate8(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Truncate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[k+31:k] := Truncate32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Truncate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[k+15:k] := Truncate16(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Truncate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[k+7:k] := Saturate8(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := Saturate8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[k+31:k] := Saturate32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := Saturate32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[k+15:k] := Saturate16(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := Saturate16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[i+31:i] := SignExtend32(a[k+7:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[i+63:i] := SignExtend64(a[k+7:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[i+63:i] := SignExtend64(a[k+31:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[i+31:i] := SignExtend32(a[k+15:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + l := j*16 + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := SignExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[i+63:i] := SignExtend64(a[k+15:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := SignExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+31:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 32-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+31:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[k+7:k] := SaturateU8(a[i+63:i]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := src[l+7:l] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[l+7:l] := SaturateU8(a[i+63:i]) + ELSE + dst[l+7:l] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[k+31:k] := SaturateU32(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[l+31:l] := SaturateU32(a[i+63:i]) + ELSE + dst[l+31:l] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[k+15:k] := SaturateU16(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := src[l+15:l] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + Store + + + + + Convert packed unsigned 64-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[l+15:l] := SaturateU16(a[i+63:i]) + ELSE + dst[l+15:l] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 8*j + dst[i+31:i] := ZeroExtend32(a[k+7:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 8*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+7:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 8*j + dst[i+63:i] := ZeroExtend64(a[k+7:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 8*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+7:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 32*j + dst[i+63:i] := ZeroExtend64(a[k+31:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 32*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+31:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 15 + i := 32*j + k := 16*j + dst[i+31:i] := ZeroExtend32(a[k+15:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := 32*j + l := 16*j + IF k[j] + dst[i+31:i] := ZeroExtend32(a[l+15:l]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := 64*j + k := 16*j + dst[i+63:i] := ZeroExtend64(a[k+15:k]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := 64*j + l := 16*j + IF k[j] + dst[i+63:i] := ZeroExtend64(a[l+15:l]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". + +dst[31:0] := a[31:0] + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". + +dst[63:0] := a[63:0] + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] + +IF k[0] + dst[63:0] := MAX(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := MAX(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] + +IF k[0] + dst[63:0] := MAX(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := MAX(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] + +dst[63:0] := MAX(a[63:0], b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +IF k[0] + dst[31:0] := MAX(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := MAX(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +IF k[0] + dst[31:0] := MAX(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := MAX(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +dst[31:0] := MAX(a[31:0], b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note][min_float_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note][min_float_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] + +IF k[0] + dst[63:0] := MIN(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := MIN(a[63:0], b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] + +IF k[0] + dst[63:0] := MIN(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := MIN(a[63:0], b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" , and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] + +dst[63:0] := MIN(a[63:0], b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +IF k[0] + dst[31:0] := MIN(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := MIN(a[31:0], b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +IF k[0] + dst[31:0] := MIN(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := MIN(a[31:0], b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +dst[31:0] := MIN(a[31:0], b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ABS(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ABS(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +tmp[191:128] := a[191:128] +tmp[255:192] := a[191:128] +tmp[319:256] := a[319:256] +tmp[383:320] := a[319:256] +tmp[447:384] := a[447:384] +tmp[511:448] := a[447:384] +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[63:0] := a[63:0] +tmp[127:64] := a[63:0] +tmp[191:128] := a[191:128] +tmp[255:192] := a[191:128] +tmp[319:256] := a[319:256] +tmp[383:320] := a[319:256] +tmp[447:384] := a[447:384] +tmp[511:448] := a[447:384] +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := tmp[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst". + +dst[63:0] := a[63:0] +dst[127:64] := a[63:0] +dst[191:128] := a[191:128] +dst[255:192] := a[191:128] +dst[319:256] := a[319:256] +dst[383:320] := a[319:256] +dst[447:384] := a[447:384] +dst[511:448] := a[447:384] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + + Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := b[63:0] +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := b[63:0] +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +tmp[159:128] := a[191:160] +tmp[191:160] := a[191:160] +tmp[223:192] := a[255:224] +tmp[255:224] := a[255:224] +tmp[287:256] := a[319:288] +tmp[319:288] := a[319:288] +tmp[351:320] := a[383:352] +tmp[383:352] := a[383:352] +tmp[415:384] := a[447:416] +tmp[447:416] := a[447:416] +tmp[479:448] := a[511:480] +tmp[511:480] := a[511:480] +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[63:32] +tmp[63:32] := a[63:32] +tmp[95:64] := a[127:96] +tmp[127:96] := a[127:96] +tmp[159:128] := a[191:160] +tmp[191:160] := a[191:160] +tmp[223:192] := a[255:224] +tmp[255:224] := a[255:224] +tmp[287:256] := a[319:288] +tmp[319:288] := a[319:288] +tmp[351:320] := a[383:352] +tmp[383:352] := a[383:352] +tmp[415:384] := a[447:416] +tmp[447:416] := a[447:416] +tmp[479:448] := a[511:480] +tmp[511:480] := a[511:480] +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := a[63:32] +dst[95:64] := a[127:96] +dst[127:96] := a[127:96] +dst[159:128] := a[191:160] +dst[191:160] := a[191:160] +dst[223:192] := a[255:224] +dst[255:224] := a[255:224] +dst[287:256] := a[319:288] +dst[319:288] := a[319:288] +dst[351:320] := a[383:352] +dst[383:352] := a[383:352] +dst[415:384] := a[447:416] +dst[447:416] := a[447:416] +dst[479:448] := a[511:480] +dst[511:480] := a[511:480] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +tmp[159:128] := a[159:128] +tmp[191:160] := a[159:128] +tmp[223:192] := a[223:192] +tmp[255:224] := a[223:192] +tmp[287:256] := a[287:256] +tmp[319:288] := a[287:256] +tmp[351:320] := a[351:320] +tmp[383:352] := a[351:320] +tmp[415:384] := a[415:384] +tmp[447:416] := a[415:384] +tmp[479:448] := a[479:448] +tmp[511:480] := a[479:448] +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +tmp[31:0] := a[31:0] +tmp[63:32] := a[31:0] +tmp[95:64] := a[95:64] +tmp[127:96] := a[95:64] +tmp[159:128] := a[159:128] +tmp[191:160] := a[159:128] +tmp[223:192] := a[223:192] +tmp[255:224] := a[223:192] +tmp[287:256] := a[287:256] +tmp[319:288] := a[287:256] +tmp[351:320] := a[351:320] +tmp[383:352] := a[351:320] +tmp[415:384] := a[415:384] +tmp[447:416] := a[415:384] +tmp[479:448] := a[479:448] +tmp[511:480] := a[479:448] +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := a[31:0] +dst[95:64] := a[95:64] +dst[127:96] := a[95:64] +dst[159:128] := a[159:128] +dst[191:160] := a[159:128] +dst[223:192] := a[223:192] +dst[255:224] := a[223:192] +dst[287:256] := a[287:256] +dst[319:288] := a[287:256] +dst[351:320] := a[351:320] +dst[383:352] := a[351:320] +dst[415:384] := a[415:384] +dst[447:416] := a[415:384] +dst[479:448] := a[479:448] +dst[511:480] := a[479:448] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + + Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := b[31:0] +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := b[31:0] +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 15 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 15 + i := j*32 + IF k[j] + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 15 + i := j*32 + FOR h := 0 to 31 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*64 + IF k[j] + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". + +DEFINE TernaryOP(imm8, a, b, c) { + CASE imm8[7:0] OF + 0: dst[0] := 0 // imm8[7:0] := 0 + 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) + // ... + 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C + 255: dst[0] := 1 // imm8[7:0] := 1 + ESAC +} +imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) +FOR j := 0 to 7 + i := j*64 + FOR h := 0 to 63 + dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 7 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 15 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. + +FOR j := 0 to 7 + i := j*64 + k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + Broadcast 8-bit integer "a" to all elements of "dst". + +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + + Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[31:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast 32-bit integer "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + + Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[63:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast 64-bit integer "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast the low packed 16-bit integer from "a" to all all elements of "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed 32-bit integers in "dst" with the repeated 4 element sequence. + +dst[31:0] := a +dst[63:32] := b +dst[95:64] := c +dst[127:96] := d +dst[159:128] := a +dst[191:160] := b +dst[223:192] := c +dst[255:224] := d +dst[287:256] := a +dst[319:288] := b +dst[351:320] := c +dst[383:352] := d +dst[415:384] := a +dst[447:416] := b +dst[479:448] := c +dst[511:480] := d +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed 64-bit integers in "dst" with the repeated 4 element sequence. + +dst[63:0] := a +dst[127:64] := b +dst[191:128] := c +dst[255:192] := d +dst[319:256] := a +dst[383:320] := b +dst[447:384] := c +dst[511:448] := d +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence. + +dst[63:0] := a +dst[127:64] := b +dst[191:128] := c +dst[255:192] := d +dst[319:256] := a +dst[383:320] := b +dst[447:384] := c +dst[511:448] := d +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence. + +dst[31:0] := a +dst[63:32] := b +dst[95:64] := c +dst[127:96] := d +dst[159:128] := a +dst[191:160] := b +dst[223:192] := c +dst[255:224] := d +dst[287:256] := a +dst[319:288] := b +dst[351:320] := c +dst[383:352] := d +dst[415:384] := a +dst[447:416] := b +dst[479:448] := c +dst[511:480] := d +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values. + +dst[7:0] := e0 +dst[15:8] := e1 +dst[23:16] := e2 +dst[31:24] := e3 +dst[39:32] := e4 +dst[47:40] := e5 +dst[55:48] := e6 +dst[63:56] := e7 +dst[71:64] := e8 +dst[79:72] := e9 +dst[87:80] := e10 +dst[95:88] := e11 +dst[103:96] := e12 +dst[111:104] := e13 +dst[119:112] := e14 +dst[127:120] := e15 +dst[135:128] := e16 +dst[143:136] := e17 +dst[151:144] := e18 +dst[159:152] := e19 +dst[167:160] := e20 +dst[175:168] := e21 +dst[183:176] := e22 +dst[191:184] := e23 +dst[199:192] := e24 +dst[207:200] := e25 +dst[215:208] := e26 +dst[223:216] := e27 +dst[231:224] := e28 +dst[239:232] := e29 +dst[247:240] := e30 +dst[255:248] := e31 +dst[263:256] := e32 +dst[271:264] := e33 +dst[279:272] := e34 +dst[287:280] := e35 +dst[295:288] := e36 +dst[303:296] := e37 +dst[311:304] := e38 +dst[319:312] := e39 +dst[327:320] := e40 +dst[335:328] := e41 +dst[343:336] := e42 +dst[351:344] := e43 +dst[359:352] := e44 +dst[367:360] := e45 +dst[375:368] := e46 +dst[383:376] := e47 +dst[391:384] := e48 +dst[399:392] := e49 +dst[407:400] := e50 +dst[415:408] := e51 +dst[423:416] := e52 +dst[431:424] := e53 +dst[439:432] := e54 +dst[447:440] := e55 +dst[455:448] := e56 +dst[463:456] := e57 +dst[471:464] := e58 +dst[479:472] := e59 +dst[487:480] := e60 +dst[495:488] := e61 +dst[503:496] := e62 +dst[511:504] := e63 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values. + +dst[15:0] := e0 +dst[31:16] := e1 +dst[47:32] := e2 +dst[63:48] := e3 +dst[79:64] := e4 +dst[95:80] := e5 +dst[111:96] := e6 +dst[127:112] := e7 +dst[143:128] := e8 +dst[159:144] := e9 +dst[175:160] := e10 +dst[191:176] := e11 +dst[207:192] := e12 +dst[223:208] := e13 +dst[239:224] := e14 +dst[255:240] := e15 +dst[271:256] := e16 +dst[287:272] := e17 +dst[303:288] := e18 +dst[319:304] := e19 +dst[335:320] := e20 +dst[351:336] := e21 +dst[367:352] := e22 +dst[383:368] := e23 +dst[399:384] := e24 +dst[415:400] := e25 +dst[431:416] := e26 +dst[447:432] := e27 +dst[463:448] := e28 +dst[479:464] := e29 +dst[495:480] := e30 +dst[511:496] := e31 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 32-bit integers in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 +dst[159:128] := e4 +dst[191:160] := e5 +dst[223:192] := e6 +dst[255:224] := e7 +dst[287:256] := e8 +dst[319:288] := e9 +dst[351:320] := e10 +dst[383:352] := e11 +dst[415:384] := e12 +dst[447:416] := e13 +dst[479:448] := e14 +dst[511:480] := e15 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed 64-bit integers in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 +dst[191:128] := e2 +dst[255:192] := e3 +dst[319:256] := e4 +dst[383:320] := e5 +dst[447:384] := e6 +dst[511:448] := e7 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 +dst[191:128] := e2 +dst[255:192] := e3 +dst[319:256] := e4 +dst[383:320] := e5 +dst[447:384] := e6 +dst[511:448] := e7 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 +dst[159:128] := e4 +dst[191:160] := e5 +dst[223:192] := e6 +dst[255:224] := e7 +dst[287:256] := e8 +dst[319:288] := e9 +dst[351:320] := e10 +dst[383:352] := e11 +dst[415:384] := e12 +dst[447:416] := e13 +dst[479:448] := e14 +dst[511:480] := e15 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed 32-bit integers in "dst" with the repeated 4 element sequence in reverse order. + +dst[31:0] := d +dst[63:32] := c +dst[95:64] := b +dst[127:96] := a +dst[159:128] := d +dst[191:160] := c +dst[223:192] := b +dst[255:224] := a +dst[287:256] := d +dst[319:288] := c +dst[351:320] := b +dst[383:352] := a +dst[415:384] := d +dst[447:416] := c +dst[479:448] := b +dst[511:480] := a +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed 64-bit integers in "dst" with the repeated 4 element sequence in reverse order. + +dst[63:0] := d +dst[127:64] := c +dst[191:128] := b +dst[255:192] := a +dst[319:256] := d +dst[383:320] := c +dst[447:384] := b +dst[511:448] := a +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order. + +dst[63:0] := d +dst[127:64] := c +dst[191:128] := b +dst[255:192] := a +dst[319:256] := d +dst[383:320] := c +dst[447:384] := b +dst[511:448] := a +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order. + +dst[31:0] := d +dst[63:32] := c +dst[95:64] := b +dst[127:96] := a +dst[159:128] := d +dst[191:160] := c +dst[223:192] := b +dst[255:224] := a +dst[287:256] := d +dst[319:288] := c +dst[351:320] := b +dst[383:352] := a +dst[415:384] := d +dst[447:416] := c +dst[479:448] := b +dst[511:480] := a +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 32-bit integers in "dst" with the supplied values in reverse order. + +dst[31:0] := e15 +dst[63:32] := e14 +dst[95:64] := e13 +dst[127:96] := e12 +dst[159:128] := e11 +dst[191:160] := e10 +dst[223:192] := e9 +dst[255:224] := e8 +dst[287:256] := e7 +dst[319:288] := e6 +dst[351:320] := e5 +dst[383:352] := e4 +dst[415:384] := e3 +dst[447:416] := e2 +dst[479:448] := e1 +dst[511:480] := e0 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed 64-bit integers in "dst" with the supplied values in reverse order. + +dst[63:0] := e7 +dst[127:64] := e6 +dst[191:128] := e5 +dst[255:192] := e4 +dst[319:256] := e3 +dst[383:320] := e2 +dst[447:384] := e1 +dst[511:448] := e0 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[63:0] := e7 +dst[127:64] := e6 +dst[191:128] := e5 +dst[255:192] := e4 +dst[319:256] := e3 +dst[383:320] := e2 +dst[447:384] := e1 +dst[511:448] := e0 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[31:0] := e15 +dst[63:32] := e14 +dst[95:64] := e13 +dst[127:96] := e12 +dst[159:128] := e11 +dst[191:160] := e10 +dst[223:192] := e9 +dst[255:224] := e8 +dst[287:256] := e7 +dst[319:288] := e6 +dst[351:320] := e5 +dst[383:352] := e4 +dst[415:384] := e3 +dst[447:416] := e2 +dst[479:448] := e1 +dst[511:480] := e0 +dst[MAX:512] := 0 + + AVX512F +
immintrin.h
+ Set +
+ + + + Return vector of type __m512 with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + Return vector of type __m512i with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + Return vector of type __m512d with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + Return vector of type __m512 with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + Return vector of type __m512i with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512F +
immintrin.h
+ Set +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src << count) OR (src >> (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE LEFT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src << count) OR (src >> (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { + count := count_src % 32 + RETURN (src >>count) OR (src << (32 - count)) +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". + +DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { + count := count_src % 64 + RETURN (src >> count) OR (src << (64 - count)) +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) + ELSE + dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF count[i+63:i] < 64 + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (1.0 / a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[63:0] := (1.0 / b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[63:0] := (1.0 / b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +dst[63:0] := (1.0 / b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[31:0] := (1.0 / b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[31:0] := (1.0 / b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +dst[31:0] := (1.0 / b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[63:0] := (1.0 / SQRT(b[63:0])) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[63:0] := (1.0 / SQRT(b[63:0])) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. + +dst[63:0] := (1.0 / SQRT(b[63:0])) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[31:0] := (1.0 / SQRT(b[31:0])) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +IF k[0] + dst[31:0] := (1.0 / SQRT(b[31:0])) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. + +dst[31:0] := (1.0 / SQRT(b[31:0])) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note]. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := SQRT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + [round_note]. + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := SQRT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + [round_note]. + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := SQRT(b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := SQRT(b[63:0]) +ELSE + dst[63:0] := src[63:0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst[63:0] := SQRT(b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst[63:0] := SQRT(b[63:0]) +ELSE + dst[63:0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := SQRT(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := SQRT(b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := SQRT(b[31:0]) +ELSE + dst[31:0] := src[31:0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst[31:0] := SQRT(b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst[31:0] := SQRT(b[31:0]) +ELSE + dst[31:0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := SQRT(b[31:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512F +
immintrin.h
+ Elementary Math Functions +
+ + + + Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512d to type __m128d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512 to type __m128. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512d to type __m256d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512 to type __m256. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512i to type __m128i. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512i to type __m256i. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Return vector of type __m512 with undefined elements. + AVX512F +
immintrin.h
+ General Support +
+ + + Return vector of type __m512i with undefined elements. + AVX512F +
immintrin.h
+ General Support +
+ + + Return vector of type __m512d with undefined elements. + AVX512F +
immintrin.h
+ General Support +
+ + + Return vector of type __m512 with undefined elements. + AVX512F +
immintrin.h
+ General Support +
+ + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := c[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := c[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] * b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". + +FOR j := 0 to 15 + i := j*32 + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + [round_note] + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 32-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[31:0] + src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_ADD(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0 + FI +ENDFOR +dst[31:0] := REDUCE_ADD(tmp, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 64-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[63:0] + src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_ADD(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0 + FI +ENDFOR +dst[63:0] := REDUCE_ADD(tmp, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[63:0] + src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_ADD(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0 + FI +ENDFOR +dst[63:0] := REDUCE_ADD(tmp, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[31:0] + src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_ADD(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0 + FI +ENDFOR +dst[31:0] := REDUCE_ADD(tmp, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 32-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[31:0] * src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_MUL(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 1 + FI +ENDFOR +dst[31:0] := REDUCE_MUL(tmp, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed 64-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[63:0] * src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_MUL(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 1 + FI +ENDFOR +dst[63:0] := REDUCE_MUL(tmp, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[63:0] * src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_MUL(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 1.0 + FI +ENDFOR +dst[63:0] := REDUCE_MUL(tmp, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[31:0] * src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_MUL(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := FP32(1.0) + FI +ENDFOR +dst[31:0] := REDUCE_MUL(tmp, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 32-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[31:0] + src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_ADD(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_ADD(a, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 64-bit integers in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[63:0] + src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_ADD(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_ADD(a, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[63:0] + src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_ADD(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_ADD(a, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +DEFINE REDUCE_ADD(src, len) { + IF len == 2 + RETURN src[31:0] + src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_ADD(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_ADD(a, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 32-bit integers in "a" by multiplication. Returns the product of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[31:0] * src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_MUL(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MUL(a, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed 64-bit integers in "a" by multiplication. Returns the product of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[63:0] * src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_MUL(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MUL(a, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[63:0] * src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_MUL(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MUL(a, 8) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +DEFINE REDUCE_MUL(src, len) { + IF len == 2 + RETURN src[31:0] * src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_MUL(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MUL(a, 16) + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ABS(v2[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ABS(v2[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ABS(v2[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ABS(v2[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 64 bytes (16 elements) in "dst". + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) +dst[511:0] := temp[511:0] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 64 bytes (16 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +temp[1023:512] := a[511:0] +temp[511:0] := b[511:0] +temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := temp[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + [sae_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. + [getmant_note][sae_note] + FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_mask_permutexvar_epi32", and it is recommended that you use that intrinsic name. + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + IF k[j] + dst[i+31:i] := a[id+31:id] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_permutexvar_epi32", and it is recommended that you use that intrinsic name. + +FOR j := 0 to 15 + i := j*32 + id := idx[i+3:i]*32 + dst[i+31:i] := a[id+31:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := tmp_dst[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) +dst[159:128] := SELECT4(a[255:128], imm8[1:0]) +dst[191:160] := SELECT4(a[255:128], imm8[3:2]) +dst[223:192] := SELECT4(a[255:128], imm8[5:4]) +dst[255:224] := SELECT4(a[255:128], imm8[7:6]) +dst[287:256] := SELECT4(a[383:256], imm8[1:0]) +dst[319:288] := SELECT4(a[383:256], imm8[3:2]) +dst[351:320] := SELECT4(a[383:256], imm8[5:4]) +dst[383:352] := SELECT4(a[383:256], imm8[7:6]) +dst[415:384] := SELECT4(a[511:384], imm8[1:0]) +dst[447:416] := SELECT4(a[511:384], imm8[3:2]) +dst[479:448] := SELECT4(a[511:384], imm8[5:4]) +dst[511:480] := SELECT4(a[511:384], imm8[7:6]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Swizzle +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] <= b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] < b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (!(a[i+63:i] <= b[i+63:i])) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k". + +FOR j := 0 to 7 + i := j*64 + k[j] := (!(a[i+63:i] < b[i+63:i])) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k". + FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k". + FOR j := 0 to 7 + i := j*64 + k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] <= b[i+63:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] < b[i+63:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (!(a[i+63:i] <= b[i+63:i])) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (!(a[i+63:i] < b[i+63:i])) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + FOR j := 0 to 7 + i := j*64 + IF k1[j] + k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] <= b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] < b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (!(a[i+31:i] <= b[i+31:i])) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := (!(a[i+31:i] < b[i+31:i])) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k". + FOR j := 0 to 15 + i := j*32 + k[j] := ((a[i+31:i] != NaN) AND (b[i+31:i] != NaN)) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k". + FOR j := 0 to 15 + i := j*32 + k[j] := ((a[i+31:i] == NaN) OR (b[i+31:i] == NaN)) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (a[i+31:i] <= b[i+31:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (a[i+31:i] < b[i+31:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (!(a[i+31:i] <= b[i+31:i])) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := (!(a[i+31:i] < b[i+31:i])) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] != NaN) AND (b[i+31:i] != NaN)) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] == NaN) OR (b[i+31:i] == NaN)) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". + +FOR j := 0 to 15 + i := j*32 + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[2:0]) OF +0: OP := _MM_CMPINT_EQ +1: OP := _MM_CMPINT_LT +2: OP := _MM_CMPINT_LE +3: OP := _MM_CMPINT_FALSE +4: OP := _MM_CMPINT_NE +5: OP := _MM_CMPINT_NLT +6: OP := _MM_CMPINT_NLE +7: OP := _MM_CMPINT_TRUE +ESAC +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Compare +
+ + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits of integer data from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+31:i] := MEM[addr+31:addr] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Loads 8 64-bit integer elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" and stores them in "dst". + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Loads 8 64-bit integer elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Loads 8 double-precision (64-bit) floating-point elements stored at memory locations starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" them in "dst". + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + + + Loads 8 double-precision (64-bit) floating-point elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + dst[i+63:i] := MEM[addr+63:addr] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Load +
+ + + + + + Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Move +
+ + + + + + Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed 32-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 15 + i := j*32 + IF k[j] + MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits of integer data from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + Store packed 64-bit integers from "a" into memory using writemask "k". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +FOR j := 0 to 7 + i := j*64 + IF k[j] + MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. + +FOR j := 0 to 15 + i := j*32 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+31:addr] := a[i+31:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Stores 8 packed double-precision (64-bit) floating-point elements in "a" and to memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Stores 8 packed double-precision (64-bit) floating-point elements in "a" to memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory. + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] AND b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[511:0] := (a[511:0] AND b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 512 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[511:0] := ((NOT a[511:0]) AND b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in "a" and then AND with "b", and store the results in "dst". + +dst[511:0] := ((NOT a[511:0]) AND b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in "a" and "b", and store the results in "dst". + +dst[511:0] := (a[511:0] AND b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] AND b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] OR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[511:0] := (a[511:0] OR b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] OR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the resut in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*32 + IF k1[j] + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. + +FOR j := 0 to 15 + i := j*32 + k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[511:0] := (a[511:0] XOR b[511:0]) +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Reduce the packed 32-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[31:0] AND src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] AND src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_AND(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0xFFFFFFFF + FI +ENDFOR +dst[31:0] := REDUCE_AND(tmp, 16) + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Reduce the packed 64-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[63:0] AND src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] AND src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_AND(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0xFFFFFFFFFFFFFFFF + FI +ENDFOR +dst[63:0] := REDUCE_AND(tmp, 8) + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Reduce the packed 32-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[31:0] OR src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] OR src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_OR(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0 + FI +ENDFOR +dst[31:0] := REDUCE_OR(tmp, 16) + + AVX512F +
immintrin.h
+ Logical +
+ + + + + Reduce the packed 64-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[63:0] OR src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] OR src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_OR(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0 + FI +ENDFOR +dst[63:0] := REDUCE_OR(tmp, 8) + + AVX512F +
immintrin.h
+ Logical +
+ + + + Reduce the packed 32-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[31:0] AND src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] AND src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_AND(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_AND(a, 16) + + AVX512F +
immintrin.h
+ Logical +
+ + + + Reduce the packed 64-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a". + +DEFINE REDUCE_AND(src, len) { + IF len == 2 + RETURN src[63:0] AND src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] AND src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_AND(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_AND(a, 8) + + AVX512F +
immintrin.h
+ Logical +
+ + + + Reduce the packed 32-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[31:0] OR src[63:32] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := src[i+31:i] OR src[i+32*len+31:i+32*len] + ENDFOR + RETURN REDUCE_OR(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_OR(a, 16) + + AVX512F +
immintrin.h
+ Logical +
+ + + + Reduce the packed 64-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a". + +DEFINE REDUCE_OR(src, len) { + IF len == 2 + RETURN src[63:0] OR src[127:64] + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := src[i+63:i] OR src[i+64*len+63:i+64*len] + ENDFOR + RETURN REDUCE_OR(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_OR(a, 8) + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Performs element-by-element bitwise AND between packed 32-bit integer elements of "v2" and "v3", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := v2[i+31:i] & v3[i+31:i] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Logical +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := Int32(-0x80000000) + FI +ENDFOR +dst[31:0] := REDUCE_MAX(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := Int64(-0x8000000000000000) + FI +ENDFOR +dst[63:0] := REDUCE_MAX(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0 + FI +ENDFOR +dst[31:0] := REDUCE_MAX(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0 + FI +ENDFOR +dst[63:0] := REDUCE_MAX(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := Cast_FP64(0xFFEFFFFFFFFFFFFF) + FI +ENDFOR +dst[63:0] := REDUCE_MAX(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := Cast_FP32(0xFF7FFFFF) + FI +ENDFOR +dst[31:0] := REDUCE_MAX(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := Int32(0x7FFFFFFF) + FI +ENDFOR +dst[31:0] := REDUCE_MIN(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed signed 64-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := Int64(0x7FFFFFFFFFFFFFFF) + FI +ENDFOR +dst[63:0] := REDUCE_MIN(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := 0xFFFFFFFF + FI +ENDFOR +dst[31:0] := REDUCE_MIN(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed unsigned 64-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := 0xFFFFFFFFFFFFFFFF + FI +ENDFOR +dst[63:0] := REDUCE_MIN(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". [min_float_note] + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +tmp := a +FOR j := 0 to 8 + i := j*64 + IF k[j] + tmp[i+63:i] := a[i+63:i] + ELSE + tmp[i+63:i] := Cast_FP64(0x7FEFFFFFFFFFFFFF) + FI +ENDFOR +dst[63:0] := REDUCE_MIN(tmp, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". [min_float_note] + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +tmp := a +FOR j := 0 to 16 + i := j*32 + IF k[j] + tmp[i+31:i] := a[i+31:i] + ELSE + tmp[i+31:i] := Cast_FP32(0x7F7FFFFF) + FI +ENDFOR +dst[31:0] := REDUCE_MIN(tmp, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MAX(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MAX(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MAX(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MAX(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MAX(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MAX(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". + +DEFINE REDUCE_MAX(src, len) { + IF len == 2 + RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MAX(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MAX(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MIN(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed signed 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MIN(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MIN(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed unsigned 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a". + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MIN(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed double-precision (64-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*64 + src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) + ENDFOR + RETURN REDUCE_MIN(src[64*len-1:0], len) +} +dst[63:0] := REDUCE_MIN(a, 8) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + Reduce the packed single-precision (32-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] + +DEFINE REDUCE_MIN(src, len) { + IF len == 2 + RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) + FI + len := len / 2 + FOR j:= 0 to (len-1) + i := j*32 + src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) + ENDFOR + RETURN REDUCE_MIN(src[32*len-1:0], len) +} +dst[31:0] := REDUCE_MIN(a, 16) + + AVX512F +
immintrin.h
+ Special Math Functions +
+ + + + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 15 + i := j*32 + IF count[i+31:i] < 32 + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Shift +
+ + + + Cast vector of type __m512d to type __m512. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512d to type __m512i. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512 to type __m512d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512 to type __m512i. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512i to type __m512d. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Cast vector of type __m512i to type __m512. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512F +
immintrin.h
+ Cast +
+ + + + Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". + +FOR j := 0 to 7 + i := j*32 + n := j*64 + dst[n+63:n] := Convert_FP32_To_FP64(v2[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[l+63:l] := Convert_FP32_To_FP64(v2[i+31:i]) + ELSE + dst[l+63:l] := src[l+63:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". + +FOR j := 0 to 7 + i := j*32 + l := j*64 + dst[l+63:l] := Convert_Int32_To_FP64(v2[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + n := j*64 + IF k[j] + dst[n+63:n] := Convert_Int32_To_FP64(v2[i+31:i]) + ELSE + dst[n+63:n] := src[n+63:n] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". + +FOR j := 0 to 7 + i := j*32 + n := j*64 + dst[n+63:n] := Convert_Int32_To_FP64(v2[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + l := j*64 + IF k[j] + dst[l+63:l] := Convert_Int32_To_FP64(v2[i+31:i]) + ELSE + dst[l+63:l] := src[l+63:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst". The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. + +FOR j := 0 to 7 + i := j*64 + k := j*32 + dst[k+31:k] := Convert_FP64_To_FP32(v2[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. + +FOR j := 0 to 7 + i := j*64 + l := j*32 + IF k[j] + dst[l+31:l] := Convert_FP64_To_FP32(v2[i+63:i]) + ELSE + dst[l+31:l] := src[l+31:l] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Stores 8 packed 64-bit integer elements located in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". + +FOR j := 0 to 7 + i := j*64 + m := j*32 + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + + Stores 8 packed 64-bit integer elements located in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using writemask "k" (elements whose corresponding mask bit is not set are not written to memory). + +FOR j := 0 to 7 + i := j*64 + m := j*32 + IF k[j] + addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 + MEM[addr+63:addr] := a[i+63:i] + FI +ENDFOR + + + AVX512F +
immintrin.h
+ Store +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512IFMA52 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) + dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512IFMA52 +
immintrin.h
+ Arithmetic +
+ + + + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := POPCNT(a[i+63:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POPCNT(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := POPCNT(a[i+31:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POPCNT(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512VPOPCNTDQ + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := POPCNT(a[i+31:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := POPCNT(a[i+31:i]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := POPCNT(a[i+63:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := POPCNT(a[i+63:i]) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512VPOPCNTDQ +
immintrin.h
+ Bit Manipulation +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 15 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:512] := 0 + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 15 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert the BF16 (16-bit) floating-point element in "a" to a floating-point element, and store the result in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +dst[31:0] := Convert_BF16_To_FP32(a[15:0]) + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". + +FOR j := 0 to 31 + IF j < 16 + t := b.fp32[j] + ELSE + t := a.fp32[j-16] + FI + dst.word[j] := Convert_FP32_To_BF16(t) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF j < 16 + t := b.fp32[j] + ELSE + t := a.fp32[j-16] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF j < 16 + t := b.fp32[j] + ELSE + t := a.fp32[j-16] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Convert +
+ + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 15 + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BF16 + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 3 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:128] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 3 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 3 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 7 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:256] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 7 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. + +FOR j := 0 to 7 + i := j*32 + m := j*16 + IF k[j] + dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert the single-precision (32-bit) floating-point element in "a" to a BF16 (16-bit) floating-point element, and store the result in "dst". + +dst[15:0] := Convert_FP32_To_BF16(a[31:0]) + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". + +FOR j := 0 to 7 + IF j < 4 + t := b.fp32[j] + ELSE + t := a.fp32[j-4] + FI + dst.word[j] := Convert_FP32_To_BF16(t) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF j < 4 + t := b.fp32[j] + ELSE + t := a.fp32[j-4] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF j < 4 + t := b.fp32[j] + ELSE + t := a.fp32[j-4] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". + +FOR j := 0 to 15 + IF j < 8 + t := b.fp32[j] + ELSE + t := a.fp32[j-8] + FI + dst.word[j] := Convert_FP32_To_BF16(t) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF j < 8 + t := b.fp32[j] + ELSE + t := a.fp32[j-8] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF j < 8 + t := b.fp32[j] + ELSE + t := a.fp32[j-8] + FI + dst.word[j] := Convert_FP32_To_BF16(t) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 3 + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 3 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 3 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 7 + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 7 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE make_fp32(x[15:0]) { + y.fp32 := 0.0 + y[31:16] := x[15:0] + RETURN y +} +dst := src +FOR j := 0 to 7 + IF k[j] + dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) + dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BF16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 3 //Qword + FOR j := 0 to 7 // Byte + IF k[i*8+j] + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ELSE + dst[i*8+j] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:32] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". + +FOR i := 0 to 3 //Qword + FOR j := 0 to 7 // Byte + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ENDFOR +ENDFOR +dst[MAX:32] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 1 //Qword + FOR j := 0 to 7 // Byte + IF k[i*8+j] + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ELSE + dst[i*8+j] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:16] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". + +FOR i := 0 to 1 //Qword + FOR j := 0 to 7 // Byte + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ENDFOR +ENDFOR +dst[MAX:16] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POPCNT(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POPCNT(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*8 + dst[i+7:i] := POPCNT(a[i+7:i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := POPCNT(a[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_BITALG + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 7 //Qword + FOR j := 0 to 7 // Byte + IF k[i*8+j] + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ELSE + dst[i*8+j] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:64] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". + +FOR i := 0 to 7 //Qword + FOR j := 0 to 7 // Byte + m := c.qword[i].byte[j] & 0x3F + dst[i*8+j] := b.qword[i].bit[m] + ENDFOR +ENDFOR +dst[MAX:64] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POPCNT(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POPCNT(a[i+15:i]) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 63 + i := j*8 + dst[i+7:i] := POPCNT(a[i+7:i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE POPCNT(a) { + count := 0 + DO WHILE a > 0 + count += a[0] + a >>= 1 + OD + RETURN count +} +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := POPCNT(a[i+7:i]) + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_BITALG +
immintrin.h
+ Bit Manipulation +
+ + + + + Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ACOS(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ACOSH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ASIN(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ASINH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ATAN(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ATANH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := CubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := CDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := InverseCDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := COSD(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := COSH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ERF(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := 1.0 - ERF(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := 1.0 / ERF(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := InvCubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := InvSQRT(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := LOG(1.0 + a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + + Elementary Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ConvertExpFP16(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) + MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +cos_res[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SIND(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SINH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := CEIL(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := FLOOR(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ROUND(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". + + Elementary Math Functions +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := SQRT(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := TAN(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := TAND(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := TANH(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst" + + Special Math FunctionsFOR j := 0 to 15 + i := j*16 + dst[i+15:i] := TRUNCATE(a[i+15:i]) +ENDFOR +dst[MAX:256] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ACOS(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ACOSH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ASIN(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ASINH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ATAN(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperblic tangent of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ATANH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := CubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := CDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := InverseCDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := CEIL(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := COSD(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := COSH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ERF(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := 1.0 - ERF(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := 1.0 / ERF(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := FLOOR(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := InvSQRT(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := LOG(1.0 + a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ConvertExpFP16(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ACOS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ACOSH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ASIN(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ASINH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ATAN(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ATANH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := CubeRoot(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := CDFNormal(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := InverseCDFNormal(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := CEIL(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := COS(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + TrigonometryFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := COSD(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := COSH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ERF(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := 1.0 - ERF(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Probability/StatisticsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := 1.0 / ERF(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POW(FP16(e), a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := FLOOR(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := InvSQRT(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := LOG(1.0 + a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := LOG(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + + + + Elementary Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ConvertExpFP16(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Rounds each packed half-precision (16-bit) floating-point element in "a" to the nearest integer value and stores the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := NearbyInt(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Computes the reciprocal of packed half-precision (16-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := (1.0 / a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Rounds the packed half-precision (16-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := RoundToNearestEven(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SIN(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). + + + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SIN(a[i+15:i]) + MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) + ELSE + dst[i+15:i] := sin_src[i+15:i] + MEM[mem_addr+i+15:mem_addr+i] := cos_src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + TrigonometryFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SIND(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := SINH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ROUND(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := TAN(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + TrigonometryFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := TAND(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := TANH(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + + + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := TRUNCATE(a[i+15:i]) + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Rounds each packed half-precision (16-bit) floating-point element in "a" to the nearest integer value and stores the results as packed half-precision floating-point elements in "dst". + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := NearbyInt(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Computes the reciprocal of packed half-precision (16-bit) floating-point elements in "a", storing the results in "dst". + + Elementary Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := (1.0 / a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Rounds the packed half-precision (16-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := RoundToNearestEven(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) + MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +cos_res[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SIND(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := SINH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ROUND(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := TAN(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := TAND(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := TANH(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst". + + Special Math FunctionsFOR j := 0 to 31 + i := j*16 + dst[i+15:i] := TRUNCATE(a[i+15:i]) +ENDFOR +dst[MAX:512] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ACOS(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ACOSH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ASIN(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ASINH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ATAN(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ATANH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := CubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := CDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := InverseCDFNormal(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := COSD(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := COSH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ERF(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := 1.0 - ERF(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Probability/StatisticsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := 1.0 / ERF(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := InvCubeRoot(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := InvSQRT(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := LOG(1.0 + a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := LOG(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + + Elementary Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ConvertExpFP16(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SIN(a[i+15:i]) + MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +cos_res[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SIND(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SINH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := CEIL(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := FLOOR(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". + + Special Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ROUND(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". + + Elementary Math Functions +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SQRT(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := TAN(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + + TrigonometryFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := TAND(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + + Trigonometry +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := TANH(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst". + + Special Math FunctionsFOR j := 0 to 7 + i := j*16 + dst[i+15:i] := TRUNCATE(a[i+15:i]) +ENDFOR +dst[MAX:128] := 0 +
immintrin.h
AVX512_FP16
+ + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := a.fp16[j] + b.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := a.fp16[j] + b.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := a.fp16[j] / b.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := a.fp16[j] / b.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 15 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := a.fp16[j] - b.fp16[j] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := a.fp16[j] - b.fp16[j] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR i := 0 TO 7 + dst.fp16[i] := a.fp16[i] * b.fp16[i] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 TO 7 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 TO 7 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR i := 0 TO 15 + dst.fp16[i] := a.fp16[i] * b.fp16[i] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 TO 15 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 TO 15 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 3 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 7 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +tmp := a +FOR i := 0 to 7 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+8] +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (316-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +tmp := a +FOR i := 0 to 7 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+8] +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". + +tmp := a +FOR i := 0 to 7 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". + +tmp := a +FOR i := 0 to 7 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +tmp := a +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +tmp := a +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". + +tmp := a +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". + +tmp := a +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := ABS(v2.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := ABS(v2.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 +ENDFOR +k[MAX:8] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 7 + IF k1[j] + k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 +ENDFOR +k[MAX:16] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 15 + IF k1[j] + k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Compare +
+ + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. + +FOR j := 0 TO 1 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:32] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 TO 3 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 to 3 + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 to 3 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. + +FOR j := 0 to 3 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:64] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 3 + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 3 + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 3 + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 3 + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 1 + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 3 + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 1 + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 3 + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 1 + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 3 + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 1 + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 1 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 3 + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 3 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 15 + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 15 + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 15 + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 15 + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := src.fp64[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := src.fp64[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := src.fp32[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := src.fp32[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Convert +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 7 + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 15 + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [max_float_note] + +dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 7 + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 15 + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [min_float_note] + +dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Special Math Functions +
+ + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 7 + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) +ENDFOR +dest[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dest[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dest[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 15 + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) +ENDFOR +dest[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dest[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dest[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 7 + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 15 + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 7 + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 7 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 7 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 15 + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 15 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 15 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 7 + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 15 + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 7 + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR i := 0 to 7 + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) +ENDFOR +k[MAX:8] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR i := 0 to 7 + IF k1[i] + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) + ELSE + k[i] := 0 + FI +ENDFOR +k[MAX:8] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR i := 0 to 15 + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) +ENDFOR +k[MAX:16] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR i := 0 to 15 + IF k1[i] + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) + ELSE + k[i] := 0 + FI +ENDFOR +k[MAX:16] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle half-precision (16-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + off := idx[i+2:i] + dst.fp16[j] := idx[i+3] ? b.fp16[off] : a.fp16[off] +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle half-precision (16-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + off := idx[i+3:i] + dst.fp16[j] := idx[i+4] ? b.fp16[off] : a.fp16[off] +ENDFOR +dst[MAX:256] := 0 + + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := b.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 7 + IF k[j] + dst.fp16[j] := b.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle half-precision (16-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*16 + id := idx[i+3:i] + dst.fp16[j] := a.fp16[id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle half-precision (16-bit) floating-point elements in "a" using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + id := idx[i+2:i] + dst.fp16[j] := a.fp16[id] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Miscellaneous +
+ + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR i := 0 to 7 + dst.fp16[i] := SQRT(a.fp16[i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR i := 0 to 15 + dst.fp16[i] := SQRT(a.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + dst.fp16[i] := (1.0 / a.fp16[i]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 7 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + dst.fp16[i] := (1.0 / a.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Elementary Math Functions +
+ + + + Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[255:0] := MEM[mem_addr+255:mem_addr] +dst[MAX:256] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] +dst[MAX:128] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Load +
+ + + + + Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+255:mem_addr] := a[255:0] + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Store +
+ + + + Return vector of type __m256h with undefined elements. + AVX512_FP16 + AVX512VL +
immintrin.h
+ General Support +
+ + + + Return vector of type __m128h with undefined elements. + AVX512_FP16 + AVX512VL +
immintrin.h
+ General Support +
+ + + + Return vector of type __m256h with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Set +
+ + + + Return vector of type __m128h with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512_FP16 + AVX512VL +
immintrin.h
+ Set +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 TO 31 + dst.fp16[j] := a.fp16[j] + b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.fp16[j] := a.fp16[j] + b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] + b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] + b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := a.fp16[0] + b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] + b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] + b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] + b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] + b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := a.fp16[j] / b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := a.fp16[j] / b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := a.fp16[j] / b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] / b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] / b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] / b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := a.fp16[0] / b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] / b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] / b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := a.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := c.fp16[0] +FI +dst[127:16] := c[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 31 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 31 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". + [round_note] + +FOR j := 0 to 31 + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := c.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 31 + IF k[j] + IF ((j & 1) == 0) + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] + ELSE + dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] + FI + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 TO 31 + dst.fp16[j] := a.fp16[j] - b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.fp16[j] := a.fp16[j] - b.fp16[j] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := a.fp16[j] - b.fp16[j] + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] - b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := a.fp16[0] - b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] - b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] - b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] - b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] - b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR i := 0 TO 31 + dst.fp16[i] := a.fp16[i] * b.fp16[i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". + [round_note] + +FOR i := 0 TO 31 + dst.fp16[i] := a.fp16[i] * b.fp16[i] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := a.fp16[i] * b.fp16[i] + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] * b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := a.fp16[0] * b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] * b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] * b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := a.fp16[0] * b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := a.fp16[0] * b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := src.fp16[2*i+0] + dst.fp16[2*i+1] := src.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := src.fp16[0] + dst.fp16[1] := src.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "src", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "src", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := a.fp16[0] + dst.fp16[1] := a.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := c.fp16[0] + dst.fp16[1] := c.fp16[1] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := a.fp16[0] + dst.fp16[1] := a.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := c.fp16[0] + dst.fp16[1] := c.fp16[1] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := a.fp16[2*i+0] + dst.fp16[2*i+1] := a.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := c.fp16[2*i+0] + dst.fp16[2*i+1] := c.fp16[2*i+1] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +FOR i := 0 to 15 + IF k[i] + dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] + dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] + ELSE + dst.fp16[2*i+0] := 0 + dst.fp16[2*i+1] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := a.fp16[0] + dst.fp16[1] := a.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := c.fp16[0] + dst.fp16[1] := c.fp16[1] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] +dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := a.fp16[0] + dst.fp16[1] := a.fp16[1] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := c.fp16[0] + dst.fp16[1] := c.fp16[1] +FI +dst[127:32] := c[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + [round_note] + +IF k[0] + dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] + dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] +ELSE + dst.fp16[0] := 0 + dst.fp16[1] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". + +tmp := a +FOR i := 0 to 15 + tmp.fp16[i] := tmp.fp16[i] + a.fp16[i+16] +ENDFOR +FOR i := 0 to 7 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+8] +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". + +tmp := a +FOR i := 0 to 15 + tmp.fp16[i] := tmp.fp16[i] * a.fp16[i+16] +ENDFOR +FOR i := 0 to 7 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+8] +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] +ENDFOR +dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". [max_float_note] + +tmp := a +FOR i := 0 to 15 + tmp.fp16[i] := (a.fp16[i] > a.fp16[i+16] ? a.fp16[i] : a.fp16[i+16]) +ENDFOR +FOR i := 0 to 7 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] + +tmp := a +FOR i := 0 to 15 + tmp.fp16[i] := (a.fp16[i] < a.fp16[i+16] ? tmp.fp16[i] : a.fp16[i+16]) +ENDFOR +FOR i := 0 to 7 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) +ENDFOR +FOR i := 0 to 3 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) +ENDFOR +FOR i := 0 to 1 + tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) +ENDFOR +dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". + +FOR j := 0 to 31 + dst.fp16[j] := ABS(v2.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Arithmetic +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 31 + k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 31 + IF k1[j] + k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 31 + k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + CASE (imm8[3:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +FOR j := 0 to 31 + IF k1[j] + k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 + ELSE + k[j] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := (a.fp16[0] OP b.fp16[0]) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +k[0] := (a.fp16[0] OP b.fp16[0]) ? 1 : 0 +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +IF k1[0] + k[0] := ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +RETURN ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] + CASE (imm8[4:0]) OF +0: OP := _CMP_EQ_OQ +1: OP := _CMP_LT_OS +2: OP := _CMP_LE_OS +3: OP := _CMP_UNORD_Q +4: OP := _CMP_NEQ_UQ +5: OP := _CMP_NLT_US +6: OP := _CMP_NLE_US +7: OP := _CMP_ORD_Q +8: OP := _CMP_EQ_UQ +9: OP := _CMP_NGE_US +10: OP := _CMP_NGT_US +11: OP := _CMP_FALSE_OQ +12: OP := _CMP_NEQ_OQ +13: OP := _CMP_GE_OS +14: OP := _CMP_GT_OS +15: OP := _CMP_TRUE_UQ +16: OP := _CMP_EQ_OS +17: OP := _CMP_LT_OQ +18: OP := _CMP_LE_OQ +19: OP := _CMP_UNORD_S +20: OP := _CMP_NEQ_US +21: OP := _CMP_NLT_UQ +22: OP := _CMP_NLE_UQ +23: OP := _CMP_ORD_S +24: OP := _CMP_EQ_US +25: OP := _CMP_NGE_UQ +26: OP := _CMP_NGT_UQ +27: OP := _CMP_FALSE_OS +28: OP := _CMP_NEQ_OS +29: OP := _CMP_GE_OQ +30: OP := _CMP_GT_OQ +31: OP := _CMP_TRUE_US +ESAC +RETURN ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for equality, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] == b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] < b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] <= b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] > b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] >= b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for not-equal, and return the boolean result (0 or 1). + RETURN ( a.fp16[0] ==NaN OR b.fp16[0] ==NaN OR a.fp16[0] != b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] == b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] < b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] <= b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] > b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] >= b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + + Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a.fp16[0] ==NaN OR b.fp16[0] ==NaN OR a.fp16[0] != b.fp16[0] ) ? 1 : 0 + + + AVX512_FP16 +
immintrin.h
+ Compare +
+ + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 31 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 31 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 15 + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper element of "dst". + +dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper element of "dst". + [round_note] + +dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". + +IF k[0] + dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". + +IF k[0] + dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_note] + +FOR j := 0 to 15 + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 15 + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 15 + IF k[j] + dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 7 + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := src.qword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 7 + IF k[j] + dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) + ELSE + dst.qword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". + [round_note] + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". [sae_note] + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". [sae_note] + +FOR j := 0 TO 31 + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := src.word[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 TO 31 + IF k[j] + dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) + ELSE + dst.word[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". [sae_note] + +FOR j := 0 to 7 + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := src.fp64[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := src.fp64[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 7 + IF k[j] + dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) + ELSE + dst.fp64[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 15 + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". [sae_note] + +FOR j := 0 to 15 + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := src.fp32[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := src.fp32[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] + +FOR j := 0 to 15 + IF k[j] + dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) + ELSE + dst.fp32[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [sae_note] + +dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +ELSE + dst.fp64[0] := src.fp64[0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note] + +IF k[0] + dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +ELSE + dst.fp64[0] := src.fp64[0] +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". + +IF k[0] + dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +ELSE + dst.fp64[0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note] + +IF k[0] + dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) +ELSE + dst.fp64[0] := 0 +FI +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] + +dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +ELSE + dst.fp32[0] := src.fp32[0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] + +IF k[0] + dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +ELSE + dst.fp32[0] := src.fp32[0] +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +ELSE + dst.fp32[0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + + Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] + +IF k[0] + dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) +ELSE + dst.fp32[0] := 0 +FI +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst.dword := Convert_FP16_To_Int32(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + [round_note] + +dst.dword := Convert_FP16_To_Int32(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst.qword := Convert_FP16_To_Int64(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + [round_note] + +dst.qword := Convert_FP16_To_Int64(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst.dword := Convert_FP16_To_Int32_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". [sae_note] + +dst.dword := Convert_FP16_To_Int32_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst.qword := Convert_FP16_To_Int64_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". [sae_note] + +dst.qword := Convert_FP16_To_Int64_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". + +dst.dword := Convert_FP16_To_UInt32(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". [sae_note] + +dst.dword := Convert_FP16_To_UInt32(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". + +dst.qword := Convert_FP16_To_UInt64(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". [round_note] + +dst.qword := Convert_FP16_To_UInt64(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". + +dst.dword := Convert_FP16_To_UInt32_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". [sae_note] + +dst.dword := Convert_FP16_To_UInt32_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". + +dst.qword := Convert_FP16_To_UInt64_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". [sae_note] + +dst.qword := Convert_FP16_To_UInt64_Truncate(a.fp16[0]) + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the signed 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Convert the unsigned 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + + Convert the unsigned 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy 16-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst". + +dst.fp16[0] := a.fp16[0] +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy the lower 16-bit integer in "a" to "dst". + +dst.fp16[0] := a.fp16[0] +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". + +dst[15:0] := a.fp16[0] + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". + +dst[15:0] := a.fp16[0] + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". + +dst[15:0] := a.fp16[0] + + AVX512_FP16 +
immintrin.h
+ Convert +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note] [min_float_note] + +FOR j := 0 to 31 + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := src.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) + ELSE + dst.fp16[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +IF k[0] + dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +IF k[0] + dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +IF k[0] + dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + + + + + Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +IF k[0] + dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Special Math Functions +
+ + + + Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst", and zero the upper elements. + +dst.fp16[0] := MEM[mem_addr].fp16[0] +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + + + Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. + +IF k[0] + dst.fp16[0] := MEM[mem_addr].fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + + Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. + +IF k[0] + dst.fp16[0] := MEM[mem_addr].fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[MAX:16] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[511:0] := MEM[mem_addr+511:mem_addr] +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Load +
+ + + + + Store the lower half-precision (16-bit) floating-point element from "a" into memory. + +MEM[mem_addr].fp16[0] := a.fp16[0] + + + AVX512_FP16 +
immintrin.h
+ Store +
+ + + + + + Store the lower half-precision (16-bit) floating-point element from "a" into memory using writemask "k". + +IF k[0] + MEM[mem_addr].fp16[0] := a.fp16[0] +FI + + + AVX512_FP16 +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512_FP16 +
immintrin.h
+ Store +
+ + + + + Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+511:mem_addr] := a[511:0] + + + AVX512_FP16 +
immintrin.h
+ Store +
+ + + + + Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := b.fp16[0] +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Move +
+ + + + + + + Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := b.fp16[0] +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Move +
+ + + + + + Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := b.fp16[0] +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Move +
+ + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dest[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +IF k[0] + dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +IF k[0] + dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +IF k[0] + dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] + +DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { + m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) + RETURN tmp.fp16 +} +IF k[0] + dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dest[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 31 + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] + FOR i := 0 to 31 + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] + FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] + FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ConvertExpFP16(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] + IF k[0] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. + IF k[0] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] + IF k[0] + dst.fp16[0] := ConvertExpFP16(b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 31 + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + FOR i := 0 TO 31 + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + FOR i := 0 TO 31 + IF k[i] + dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + IF k[0] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note] + IF k[0] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. + [getmant_note][sae_note] + IF k[0] + dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] + +DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { + m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved + tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) + tmp[15:0] := src[15:0] - tmp[15:0] + IF IsInf(tmp[15:0]) + tmp[15:0] := FP16(0.0) + FI + RETURN tmp[15:0] +} +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +FOR i := 0 to 15 + IF k[i] + dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +IF k[0] + dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +IF k[0] + dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +IF k[0] + dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + + Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + DEFINE ScaleFP16(src1, src2) { + denormal1 := (a.exp == 0) and (a.fraction != 0) + denormal2 := (b.exp == 0) and (b.fraction != 0) + tmp1 := src1 + tmp2 := src2 + IF MXCSR.DAZ + IF denormal1 + tmp1 := 0 + FI + IF denormal2 + tmp2 := 0 + FI + FI + RETURN tmp1 * POW(2.0, FLOOR(tmp2)) +} +IF k[0] + dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". + [fpclass_note] + FOR i := 0 to 31 + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). + [fpclass_note] + FOR i := 0 to 31 + IF k1[i] + k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) + ELSE + k[i] := 0 + FI +ENDFOR +k[MAX:32] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Test the lower half-precision (16-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k". + [fpclass_note] + k[0] := CheckFPClass_FP16(a.fp16[0], imm8[7:0]) +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Test the lower half-precision (16-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). + [fpclass_note] + IF k1[0] + k[0] := CheckFPClass_FP16(a.fp16[0], imm8[7:0]) +ELSE + k[0] := 0 +FI +k[MAX:1] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Shuffle half-precision (16-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + off := idx[i+4:i] + dst.fp16[j] := idx[i+5] ? b.fp16[off] : a.fp16[off] +ENDFOR +dst[MAX:512] := 0 + + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + + Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". + +FOR j := 0 to 31 + IF k[j] + dst.fp16[j] := b.fp16[j] + ELSE + dst.fp16[j] := a.fp16[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + + Shuffle half-precision (16-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*16 + id := idx[i+4:i] + dst.fp16[j] := a.fp16[id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Miscellaneous +
+ + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +IF k[0] + dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +IF k[0] + dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + +FOR i := 0 to 31 + dst.fp16[i] := SQRT(a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". + [round_note] + +FOR i := 0 to 31 + dst.fp16[i] := SQRT(a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + [round_note] + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + [round_note] + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := SQRT(a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +dst.fp16[0] := SQRT(b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst.fp16[0] := SQRT(b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := SQRT(b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := SQRT(b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + +IF k[0] + dst.fp16[0] := SQRT(b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". + [round_note] + +IF k[0] + dst.fp16[0] := SQRT(b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + dst.fp16[i] := (1.0 / a.fp16[i]) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := src.fp16[i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR i := 0 to 31 + IF k[i] + dst.fp16[i] := (1.0 / a.fp16[i]) + ELSE + dst.fp16[i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +dst.fp16[0] := (1.0 / b.fp16[0]) +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +IF k[0] + dst.fp16[0] := (1.0 / b.fp16[0]) +ELSE + dst.fp16[0] := src.fp16[0] +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +IF k[0] + dst.fp16[0] := (1.0 / b.fp16[0]) +ELSE + dst.fp16[0] := 0 +FI +dst[127:16] := a[127:16] +dst[MAX:128] := 0 + + + AVX512_FP16 +
immintrin.h
+ Elementary Math Functions +
+ + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. + +dst.fp16[0] := e0 +dst.fp16[1] := e1 +dst.fp16[2] := e2 +dst.fp16[3] := e3 +dst.fp16[4] := e4 +dst.fp16[5] := e5 +dst.fp16[6] := e6 +dst.fp16[7] := e7 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. + +dst.fp16[0] := e0 +dst.fp16[1] := e1 +dst.fp16[2] := e2 +dst.fp16[3] := e3 +dst.fp16[4] := e4 +dst.fp16[5] := e5 +dst.fp16[6] := e6 +dst.fp16[7] := e7 +dst.fp16[8] := e8 +dst.fp16[9] := e9 +dst.fp16[10] := e10 +dst.fp16[11] := e11 +dst.fp16[12] := e12 +dst.fp16[13] := e13 +dst.fp16[14] := e14 +dst.fp16[15] := e15 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. + +dst.fp16[0] := e0 +dst.fp16[1] := e1 +dst.fp16[2] := e2 +dst.fp16[3] := e3 +dst.fp16[4] := e4 +dst.fp16[5] := e5 +dst.fp16[6] := e6 +dst.fp16[7] := e7 +dst.fp16[8] := e8 +dst.fp16[9] := e9 +dst.fp16[10] := e10 +dst.fp16[11] := e11 +dst.fp16[12] := e12 +dst.fp16[13] := e13 +dst.fp16[14] := e14 +dst.fp16[15] := e15 +dst.fp16[16] := e16 +dst.fp16[17] := e17 +dst.fp16[18] := e18 +dst.fp16[19] := e19 +dst.fp16[20] := e20 +dst.fp16[21] := e21 +dst.fp16[22] := e22 +dst.fp16[23] := e23 +dst.fp16[24] := e24 +dst.fp16[25] := e25 +dst.fp16[26] := e26 +dst.fp16[27] := e27 +dst.fp16[28] := e28 +dst.fp16[29] := e29 +dst.fp16[30] := e30 +dst.fp16[31] := e31 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst.fp16[0] := e7 +dst.fp16[1] := e6 +dst.fp16[2] := e5 +dst.fp16[3] := e4 +dst.fp16[4] := e3 +dst.fp16[5] := e2 +dst.fp16[6] := e1 +dst.fp16[7] := e0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst.fp16[0] := e15 +dst.fp16[1] := e14 +dst.fp16[2] := e13 +dst.fp16[3] := e12 +dst.fp16[4] := e11 +dst.fp16[5] := e10 +dst.fp16[6] := e9 +dst.fp16[7] := e8 +dst.fp16[8] := e7 +dst.fp16[9] := e6 +dst.fp16[10] := e5 +dst.fp16[11] := e4 +dst.fp16[12] := e3 +dst.fp16[13] := e2 +dst.fp16[14] := e1 +dst.fp16[15] := e0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst.fp16[0] := e31 +dst.fp16[1] := e30 +dst.fp16[2] := e29 +dst.fp16[3] := e28 +dst.fp16[4] := e27 +dst.fp16[5] := e26 +dst.fp16[6] := e25 +dst.fp16[7] := e24 +dst.fp16[8] := e23 +dst.fp16[9] := e22 +dst.fp16[10] := e21 +dst.fp16[11] := e20 +dst.fp16[12] := e19 +dst.fp16[13] := e18 +dst.fp16[14] := e17 +dst.fp16[15] := e16 +dst.fp16[16] := e15 +dst.fp16[17] := e14 +dst.fp16[18] := e13 +dst.fp16[19] := e12 +dst.fp16[20] := e11 +dst.fp16[21] := e10 +dst.fp16[22] := e9 +dst.fp16[23] := e8 +dst.fp16[24] := e7 +dst.fp16[25] := e6 +dst.fp16[26] := e5 +dst.fp16[27] := e4 +dst.fp16[28] := e3 +dst.fp16[29] := e2 +dst.fp16[30] := e1 +dst.fp16[31] := e0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". + +FOR i := 0 to 7 + dst.fp16[i] := a[15:0] +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". + +FOR i := 0 to 15 + dst.fp16[i] := a[15:0] +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". + +FOR i := 0 to 31 + dst.fp16[i] := a[15:0] +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". + +FOR i := 0 to 3 + dst.fp16[2*i+0] := a[15:0] + dst.fp16[2*i+1] := a[31:16] +ENDFOR +dst[MAX:128] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". + +FOR i := 0 to 7 + dst.fp16[2*i+0] := a[15:0] + dst.fp16[2*i+1] := a[31:16] +ENDFOR +dst[MAX:256] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". + +FOR i := 0 to 15 + dst.fp16[2*i+0] := a[15:0] + dst.fp16[2*i+1] := a[31:16] +ENDFOR +dst[MAX:512] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Copy half-precision (16-bit) floating-point element "a" to the lower element of "dst", and zero the upper 7 elements. + +dst.fp16[0] := a[15:0] +dst[127:16] := 0 + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + Return vector of type __m512h with all elements set to zero. + +dst[MAX:0] := 0 + + + AVX512_FP16 +
immintrin.h
+ Set +
+ + + + Cast vector of type "__m128h" to type "__m128". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m256". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m512". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m128d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m256d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m512d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m128i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m256i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m512i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128d" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256d" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512d" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128i" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256i" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512i" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m512h" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m256h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m128h" to type "__m512h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + + Cast vector of type "__m256h" to type "__m512h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + AVX512_FP16 +
immintrin.h
+ Cast +
+ + + Return vector of type __m512h with undefined elements. + AVX512_FP16 +
immintrin.h
+ General Support +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". + +FOR i := 0 to 3 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 3 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 3 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". + +FOR i := 0 to 1 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 1 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 1 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Bit Manipulation +
+ + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + id := idx[i+4:i]*8 + dst[i+7:i] := a[id+7:id] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + id := idx[i+4:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + id := idx[i+4:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + id := idx[i+3:i]*8 + dst[i+7:i] := a[id+7:id] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + id := idx[i+3:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + id := idx[i+3:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 31 + i := j*8 + off := 8*idx[i+4:i] + dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + off := 8*idx[i+4:i] + dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + off := 8*idx[i+4:i] + dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := idx[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*8 + IF k[j] + off := 8*idx[i+4:i] + dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + off := 8*idx[i+3:i] + dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + off := 8*idx[i+3:i] + dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + off := 8*idx[i+3:i] + dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := idx[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*8 + IF k[j] + off := 8*idx[i+3:i] + dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + + AVX512_VBMI + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". + +FOR i := 0 to 7 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Bit Manipulation +
+ + + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR i := 0 to 7 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Bit Manipulation +
+ + + + + + For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR i := 0 to 7 + q := i * 64 + FOR j := 0 to 7 + tmp8 := 0 + ctrl := a[q+j*8+7:q+j*8] & 63 + FOR l := 0 to 7 + tmp8[l] := b[q+((ctrl+l) & 63)] + ENDFOR + IF k[i*8+j] + dst[q+j*8+7:q+j*8] := tmp8[7:0] + ELSE + dst[q+j*8+7:q+j*8] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Bit Manipulation +
+ + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + id := idx[i+5:i]*8 + dst[i+7:i] := a[id+7:id] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + id := idx[i+5:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + id := idx[i+5:i]*8 + IF k[j] + dst[i+7:i] := a[id+7:id] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". + +FOR j := 0 to 63 + i := j*8 + off := 8*idx[i+5:i] + dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + off := 8*idx[i+5:i] + dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + off := 8*idx[i+5:i] + dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := idx[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 63 + i := j*8 + IF k[j] + off := 8*idx[i+5:i] + dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + + AVX512_VBMI +
immintrin.h
+ Swizzle +
+ + + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". + +FOR j := 0 to 15 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". + +FOR j := 0 to 7 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". + +FOR j := 0 to 3 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 1 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". + +FOR j := 0 to 7 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". + +FOR j := 0 to 3 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). + +FOR j := 0 to 15 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Shift +
+ + Swizzle + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Load +
+ + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 16 +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 16 +m := 0 +FOR j := 0 to 15 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 16 +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 16 +m := 0 +FOR j := 0 to 7 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 8 +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[255:m] := 0 +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 8 +m := 0 +FOR j := 0 to 31 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[255:m] := src[255:m] +dst[MAX:256] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 8 +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[127:m] := 0 +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 8 +m := 0 +FOR j := 0 to 15 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[127:m] := src[127:m] +dst[MAX:128] := 0 + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Swizzle +
+ + Swizzle + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 16 +m := base_addr +FOR j := 0 to 15 + i := j*16 + IF k[j] + MEM[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 16 +m := base_addr +FOR j := 0 to 7 + i := j*16 + IF k[j] + MEM[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 8 +m := base_addr +FOR j := 0 to 31 + i := j*8 + IF k[j] + MEM[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 8 +m := base_addr +FOR j := 0 to 15 + i := j*8 + IF k[j] + MEM[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 + AVX512VL +
immintrin.h
+ Store +
+ + + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". + +FOR j := 0 to 7 + i := j*64 + dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". + +FOR j := 0 to 15 + i := j*32 + dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". + +FOR j := 0 to 31 + i := j*16 + dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". + +FOR j := 0 to 15 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + i := j*64 + IF k[j] + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] + ELSE + dst[i+63:i] := src[i+63:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). + +FOR j := 0 to 7 + i := j*64 + tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] + dst[i+63:i] := tmp[127:64] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + i := j*32 + IF k[j] + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] + ELSE + dst[i+31:i] := src[i+31:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". + +FOR j := 0 to 15 + i := j*32 + tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] + dst[i+31:i] := tmp[63:32] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 31 + i := j*16 + IF k[j] + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + + + + + Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). + +FOR j := 0 to 31 + i := j*16 + tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] + dst[i+15:i] := tmp[31:16] +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Shift +
+ + Swizzle + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Load +
+ + Swizzle + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Load +
+ + Swizzle + + + + + Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Load +
+ + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[i+15:i] := a[m+15:m] + m := m + 16 + ELSE + dst[i+15:i] := src[i+15:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + + Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[i+7:i] := a[m+7:m] + m := m + 8 + ELSE + dst[i+7:i] := src[i+7:i] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 16 +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 16 +m := 0 +FOR j := 0 to 31 + i := j*16 + IF k[j] + dst[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. + +size := 8 +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[511:m] := 0 +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". + +size := 8 +m := 0 +FOR j := 0 to 63 + i := j*8 + IF k[j] + dst[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR +dst[511:m] := src[511:m] +dst[MAX:512] := 0 + + + AVX512_VBMI2 +
immintrin.h
+ Swizzle +
+ + Swizzle + + + + + Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 16 +m := base_addr +FOR j := 0 to 31 + i := j*16 + IF k[j] + MEM[m+size-1:m] := a[i+15:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 +
immintrin.h
+ Store +
+ + Swizzle + + + + + Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". + +size := 8 +m := base_addr +FOR j := 0 to 63 + i := j*8 + IF k[j] + MEM[m+size-1:m] := a[i+7:i] + m := m + size + FI +ENDFOR + + + AVX512_VBMI2 +
immintrin.h
+ Store +
+ + + + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 7 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 3 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + AVX512_VNNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 15 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 15 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 15 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +FOR j := 0 to 15 + IF k[j] + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 + ELSE + dst.dword[j] := src.dword[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 15 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:512] := 0 + + + AVX512_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + + + + Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+15:k1] := 0 +MEM[k2+15:k2] := 0 +FOR i := 0 TO 15 + FOR j := 0 TO 15 + match := (a.dword[i] == b.dword[j] ? 1 : 0) + MEM[k1+15:k1].bit[i] |= match + MEM[k2+15:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512F +
immintrin.h
+ Mask +
+ + + + + + + Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 7 + FOR j := 0 TO 7 + match := (a.qword[i] == b.qword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512F +
immintrin.h
+ Mask +
+ + + + + + + + + Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 3 + FOR j := 0 TO 3 + match := (a.dword[i] == b.dword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512VL +
immintrin.h
+ Mask +
+ + + + + + + Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 7 + FOR j := 0 TO 7 + match := (a.dword[i] == b.dword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512VL +
immintrin.h
+ Mask +
+ + + + + + + Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 1 + FOR j := 0 TO 1 + match := (a.qword[i] == b.qword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512VL +
immintrin.h
+ Mask +
+ + + + + + + Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. + +MEM[k1+7:k1] := 0 +MEM[k2+7:k2] := 0 +FOR i := 0 TO 3 + FOR j := 0 TO 3 + match := (a.qword[i] == b.qword[j] ? 1 : 0) + MEM[k1+7:k1].bit[i] |= match + MEM[k2+7:k2].bit[j] |= match + ENDFOR +ENDFOR + + + AVX512_VP2INTERSECT + AVX512VL +
immintrin.h
+ Mask +
+ + + + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 3 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". + + +FOR j := 0 to 1 + i := j*64 + tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) + dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_IFMA +
immintrin.h
+ Arithmetic +
+ + + + Convert scalar BF16 (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +b := Convert_BF16_To_FP32(MEM[__A+15:__A]) +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := b +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +b := Convert_FP16_To_FP32(MEM[__A+15:__A]) +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := b +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+15:__A+m]) +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+15:__A+m]) +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+31:__A+m+16]) +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + m := j*32 + dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+31:__A+m+16]) +ENDFOR +dst[MAX:256] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert scalar BF16 (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +b := Convert_BF16_To_FP32(MEM[__A+15:__A]) +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := b +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +b := Convert_FP16_To_FP32(MEM[__A+15:__A]) +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := b +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+15:__A+m]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+15:__A+m]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+31:__A+m+16]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + m := j*32 + dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+31:__A+m+16]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 7 + dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". + + +FOR j := 0 to 3 + dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +ENDFOR +dst[MAX:128] := 0 + + + AVX_NE_CONVERT +
immintrin.h
+ Convert +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) + tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) + tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) + tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := src.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) + tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) + dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + AVX_VNNI +
immintrin.h
+ Arithmetic +
+ + + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) + tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) + dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT16 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) + tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) + tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) + tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) + tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) + tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) + tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) + tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) + tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) + tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) + tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) + tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) + tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) + tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) + tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) + tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:256] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with unsigned saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 7 + tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) + tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) + tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) + tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) + dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:256] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) + tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) + tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) + tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) + tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) + tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) + tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) + tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) + tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) + tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) + tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) + tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) + tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) + dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) + tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) + tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) + tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) + dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +ENDFOR +dst[MAX:128] := 0 + + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with unsigned saturation, and store the packed 32-bit results in "dst". + + +FOR j := 0 to 3 + tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) + tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) + tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) + tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) + dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +ENDFOR +dst[MAX:128] := 0 + + + + AVX_VNNI_INT8 +
immintrin.h
+ Arithmetic +
+ + + + + + + Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start". + +tmp[511:0] := a +dst[31:0] := ZeroExtend32(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control". + +start := control[7:0] +len := control[15:8] +tmp[511:0] := a +dst[31:0] := ZeroExtend32(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + + Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start". + +tmp[511:0] := a +dst[63:0] := ZeroExtend64(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control".. + +start := control[7:0] +len := control[15:8] +tmp[511:0] := a +dst[63:0] := ZeroExtend64(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Extract the lowest set bit from unsigned 32-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a". + +dst := (-a) AND a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Extract the lowest set bit from unsigned 64-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a". + +dst := (-a) AND a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 32-bit integer "a". + +dst := (a - 1) XOR a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 64-bit integer "a". + +dst := (a - 1) XOR a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a". + +dst := (a - 1) AND a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a". + +dst := (a - 1) AND a + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + Compute the bitwise NOT of 32-bit integer "a" and then AND with b, and store the results in dst. + +dst[31:0] := ((NOT a[31:0]) AND b[31:0]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + Compute the bitwise NOT of 64-bit integer "a" and then AND with b, and store the results in dst. + +dst[63:0] := ((NOT a[63:0]) AND b[63:0]) + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 16-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 16) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 32) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 64) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 32) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst". + +tmp := 0 +dst := 0 +DO WHILE ((tmp < 64) AND a[tmp] == 0) + tmp := tmp + 1 + dst := dst + 1 +OD + + + BMI1 +
immintrin.h
+ Bit Manipulation +
+ + + + + + + Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index". + +n := index[7:0] +dst := a +IF (n < 32) + dst[31:n] := 0 +FI + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index". + +n := index[7:0] +dst := a +IF (n < 64) + dst[63:n] := 0 +FI + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Deposit contiguous low bits from unsigned 32-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero. + +tmp := a +dst := 0 +m := 0 +k := 0 +DO WHILE m < 32 + IF mask[m] == 1 + dst[m] := tmp[k] + k := k + 1 + FI + m := m + 1 +OD + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Deposit contiguous low bits from unsigned 64-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero. + +tmp := a +dst := 0 +m := 0 +k := 0 +DO WHILE m < 64 + IF mask[m] == 1 + dst[m] := tmp[k] + k := k + 1 + FI + m := m + 1 +OD + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Extract bits from unsigned 32-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero. + +tmp := a +dst := 0 +m := 0 +k := 0 +DO WHILE m < 32 + IF mask[m] == 1 + dst[k] := tmp[m] + k := k + 1 + FI + m := m + 1 +OD + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + Extract bits from unsigned 64-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero. + +tmp := a +dst := 0 +m := 0 +k := 0 +DO WHILE m < 64 + IF mask[m] == 1 + dst[k] := tmp[m] + k := k + 1 + FI + m := m + 1 +OD + + + BMI2 +
immintrin.h
+ Bit Manipulation +
+ + + + + + Multiply unsigned 32-bit integers "a" and "b", store the low 32-bits of the result in "dst", and store the high 32-bits in "hi". This does not read or write arithmetic flags. + +dst[31:0] := (a * b)[31:0] +MEM[hi+31:hi] := (a * b)[63:32] + + + BMI2 +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply unsigned 64-bit integers "a" and "b", store the low 64-bits of the result in "dst", and store the high 64-bits in "hi". This does not read or write arithmetic flags. + +dst[63:0] := (a * b)[63:0] +MEM[hi+63:hi] := (a * b)[127:64] + + + BMI2 +
immintrin.h
+ Arithmetic +
+ + + + + + Increment the shadow stack pointer by 4 times the value specified in bits [7:0] of "a". + +SSP := SSP + a[7:0] * 4 + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Increment the shadow stack pointer by 8 times the value specified in bits [7:0] of "a". + +SSP := SSP + a[7:0] * 8 + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Read the low 32-bits of the current shadow stack pointer, and store the result in "dst". + dst := SSP[31:0] + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Read the current shadow stack pointer, and store the result in "dst". + dst := SSP[63:0] + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Save the previous shadow stack pointer context. + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Restore the saved shadow stack pointer from the shadow stack restore token previously created on shadow stack by saveprevssp. + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Write 32-bit value in "val" to a shadow stack page in memory specified by "p". + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Write 64-bit value in "val" to a shadow stack page in memory specified by "p". + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Write 32-bit value in "val" to a user shadow stack page in memory specified by "p". + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Write 64-bit value in "val" to a user shadow stack page in memory specified by "p". + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Mark shadow stack pointed to by IA32_PL0_SSP as busy. + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Mark shadow stack pointed to by "p" as not busy. + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + If CET is enabled, read the low 32-bits of the current shadow stack pointer, and store the result in "dst". Otherwise return 0. + dst := SSP[31:0] + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + If CET is enabled, read the current shadow stack pointer, and store the result in "dst". Otherwise return 0. + dst := SSP[63:0] + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + Increment the shadow stack pointer by 4 times the value specified in bits [7:0] of "a". + +SSP := SSP + a[7:0] * 4 + + + CET_SS +
immintrin.h
+ Miscellaneous +
+ + + + + Hint to hardware that the cache line that contains "p" should be demoted from the cache closest to the processor core to a level more distant from the processor core. + + CLDEMOTE +
immintrin.h
+ Miscellaneous +
+ + + + + + Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy. + + CLFLUSHOPT +
immintrin.h
+ General Support +
+ + + + + + Write back to memory the cache line that contains "p" from any level of the cache hierarchy in the cache coherence domain. + + CLWB +
immintrin.h
+ General Support +
+ + + + + + + + + Compares the value from the memory "__A" with the value of "__B". If the specified condition "__D" is met, then add the third operand "__C" to the "__A" and write it into "__A", else the value of "__A" is unchanged. The return value is the original value of "__A". + CASE (__D[3:0]) OF +0: OP := _CMPCCX_O +1: OP := _CMPCCX_NO +2: OP := _CMPCCX_B +3: OP := _CMPCCX_NB +4: OP := _CMPCCX_Z +5: OP := _CMPCCX_NZ +6: OP := _CMPCCX_BE +7: OP := _CMPCCX_NBE +8: OP := _CMPCCX_S +9: OP := _CMPCCX_NS +10: OP := _CMPCCX_P +11: OP := _CMPCCX_NP +12: OP := _CMPCCX_L +13: OP := _CMPCCX_NL +14: OP := _CMPCCX_LE +15: OP := _CMPCCX_NLE +ESAC +tmp1 := LOAD_LOCK(__A) +tmp2 := tmp1 + __C +IF (tmp1[31:0] OP __B[31:0]) + STORE_UNLOCK(__A, tmp2) +ELSE + STORE_UNLOCK(__A, tmp1) +FI +dst[31:0] := tmp1[31:0] + + + + + + + + + + + + + + + + + + CMPCCXADD +
immintrin.h
+ Arithmetic +
+ + + + + + + Compares the value from the memory "__A" with the value of "__B". If the specified condition "__D" is met, then add the third operand "__C" to the "__A" and write it into "__A", else the value of "__A" is unchanged. The return value is the original value of "__A". + CASE (__D[3:0]) OF +0: OP := _CMPCCX_O +1: OP := _CMPCCX_NO +2: OP := _CMPCCX_B +3: OP := _CMPCCX_NB +4: OP := _CMPCCX_Z +5: OP := _CMPCCX_NZ +6: OP := _CMPCCX_BE +7: OP := _CMPCCX_NBE +8: OP := _CMPCCX_S +9: OP := _CMPCCX_NS +10: OP := _CMPCCX_P +11: OP := _CMPCCX_NP +12: OP := _CMPCCX_L +13: OP := _CMPCCX_NL +14: OP := _CMPCCX_LE +15: OP := _CMPCCX_NLE +ESAC +tmp1 := LOAD_LOCK(__A) +tmp2 := tmp1 + __C +IF (tmp1[63:0] OP __B[63:0]) + STORE_UNLOCK(__A, tmp2) +ELSE + STORE_UNLOCK(__A, tmp1) +FI +dst[63:0] := tmp1[63:0] + + + + + + + + + + + + + + + + + + CMPCCXADD +
immintrin.h
+ Arithmetic +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst". + tmp1[7:0] := v[0:7] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[39:0] := tmp1[7:0] << 32 +tmp4[39:0] := tmp2[31:0] << 8 +tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0] +tmp6[31:0] := MOD2(tmp5[39:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + CRC32 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst". + tmp1[15:0] := v[0:15] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[47:0] := tmp1[15:0] << 32 +tmp4[47:0] := tmp2[31:0] << 16 +tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0] +tmp6[31:0] := MOD2(tmp5[47:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + CRC32 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst". + tmp1[31:0] := v[0:31] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[63:0] := tmp1[31:0] << 32 +tmp4[63:0] := tmp2[31:0] << 32 +tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0] +tmp6[31:0] := MOD2(tmp5[63:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + CRC32 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst". + tmp1[63:0] := v[0:63] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[95:0] := tmp1[31:0] << 32 +tmp4[95:0] := tmp2[63:0] << 64 +tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0] +tmp6[31:0] := MOD2(tmp5[95:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + CRC32 +
nmmintrin.h
+ Cryptography +
+ + + + + + + Reads 64-byte command pointed by "__src", formats 64-byte enqueue store data, and performs 64-byte enqueue store to memory pointed by "__dst". This intrinsics may only be used in User mode. + + ENQCMD +
immintrin.h
+ Unknown +
+ + + + + Reads 64-byte command pointed by "__src", formats 64-byte enqueue store data, and performs 64-byte enqueue store to memory pointed by "__dst" This intrinsic may only be used in Privileged mode. + + ENQCMD +
immintrin.h
+ Unknown +
+ + + + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:256] := 0 + + + F16C +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_imm_note] + +FOR j := 0 to 7 + i := 16*j + l := 32*j + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) +ENDFOR +dst[MAX:128] := 0 + + + F16C +
immintrin.h
+ Convert +
+ + + + Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + m := j*16 + dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) +ENDFOR +dst[MAX:128] := 0 + + + F16C +
immintrin.h
+ Convert +
+ + + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". + [round_imm_note] + +FOR j := 0 to 3 + i := 16*j + l := 32*j + dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) +ENDFOR +dst[MAX:64] := 0 + + + F16C +
immintrin.h
+ Convert +
+ + + + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] + ELSE + dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] + ELSE + dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] + FI +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*64 + dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". + +FOR j := 0 to 7 + i := j*32 + dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] +ENDFOR +dst[MAX:256] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + + + FMA +
immintrin.h
+ Arithmetic +
+ + + + + Read the FS segment base register and store the 32-bit result in "dst". + dst[31:0] := FS_Segment_Base_Register +dst[63:32] := 0 + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + Read the FS segment base register and store the 64-bit result in "dst". + dst[63:0] := FS_Segment_Base_Register + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + Read the GS segment base register and store the 32-bit result in "dst". + dst[31:0] := GS_Segment_Base_Register +dst[63:32] := 0 + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + Read the GS segment base register and store the 64-bit result in "dst". + dst[63:0] := GS_Segment_Base_Register + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + Write the unsigned 32-bit integer "a" to the FS segment base register. + +FS_Segment_Base_Register[31:0] := a[31:0] +FS_Segment_Base_Register[63:32] := 0 + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + Write the unsigned 64-bit integer "a" to the FS segment base register. + +FS_Segment_Base_Register[63:0] := a[63:0] + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + Write the unsigned 32-bit integer "a" to the GS segment base register. + +GS_Segment_Base_Register[31:0] := a[31:0] +GS_Segment_Base_Register[63:32] := 0 + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + Write the unsigned 64-bit integer "a" to the GS segment base register. + +GS_Segment_Base_Register[63:0] := a[63:0] + + + FSGSBASE +
immintrin.h
+ General Support +
+ + + + + + Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary. + state_x87_fpu_mmx_sse := fxrstor(MEM[mem_addr+512*8:mem_addr]) + + + FXSR +
immintrin.h
+ OS-Targeted +
+ + + + Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE64 instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary. + state_x87_fpu_mmx_sse := fxrstor64(MEM[mem_addr+512*8:mem_addr]) + + + FXSR +
immintrin.h
+ OS-Targeted +
+ + + + Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor. + MEM[mem_addr+512*8:mem_addr] := fxsave(state_x87_fpu_mmx_sse) + + + FXSR +
immintrin.h
+ OS-Targeted +
+ + + + Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor. + MEM[mem_addr+512*8:mem_addr] := fxsave64(state_x87_fpu_mmx_sse) + + + FXSR +
immintrin.h
+ OS-Targeted +
+ + + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 63 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := 0 + FI +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 63 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := src.byte[j] + FI +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 63 + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[b] + FI + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 7 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:512] := 0 + + + GFNI + AVX512F +
immintrin.h
+ Arithmetic +
+ + + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 31 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := 0 + FI +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 31 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := src.byte[j] + FI +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 31 + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 15 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := 0 + FI +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 15 + IF k[j] + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) + ELSE + dst.byte[j] := src.byte[j] + FI +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. + +DEFINE gf2p8mul_byte(src1byte, src2byte) { + tword := 0 + FOR i := 0 to 7 + IF src2byte.bit[i] + tword := tword XOR (src1byte << i) + FI + ENDFOR + FOR i := 14 downto 8 + p := 0x11B << (i-8) + IF tword.bit[i] + tword := tword XOR p + FI + ENDFOR + RETURN tword.byte[0] +} +FOR j := 0 TO 15 + dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". + +DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 3 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:256] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := 0 + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + IF k[j*8+i] + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ELSE + dst.qword[j].byte[i] := src.qword[j].byte[i] + FI + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". + DEFINE parity(x) { + t := 0 + FOR i := 0 to 7 + t := t XOR x.bit[i] + ENDFOR + RETURN t +} +DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { + FOR i := 0 to 7 + retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] + ENDFOR + RETURN retbyte +} +FOR j := 0 TO 1 + FOR i := 0 to 7 + dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) + ENDFOR +ENDFOR +dst[MAX:128] := 0 + + + GFNI + AVX512VL +
immintrin.h
+ Arithmetic +
+ + + + + + Provides a hint to the processor to selectively reset the prediction history of the current logical processor specified by a signed 32-bit integer "__eax". + + HRESET +
immintrin.h
+ General Support +
+ + + + + + Invalidate mappings in the Translation Lookaside Buffers (TLBs) and paging-structure caches for the processor context identifier (PCID) specified by "descriptor" based on the invalidation type specified in "type". + The PCID "descriptor" is specified as a 16-byte memory operand (with no alignment restrictions) where bits [11:0] specify the PCID, and bits [127:64] specify the linear address; bits [63:12] are reserved. + The types supported are: + 0) Individual-address invalidation: If "type" is 0, the logical processor invalidates mappings for a single linear address and tagged with the PCID specified in "descriptor", except global translations. The instruction may also invalidate global translations, mappings for other linear addresses, or mappings tagged with other PCIDs. + 1) Single-context invalidation: If "type" is 1, the logical processor invalidates all mappings tagged with the PCID specified in "descriptor" except global translations. In some cases, it may invalidate mappings for other PCIDs as well. + 2) All-context invalidation: If "type" is 2, the logical processor invalidates all mappings tagged with any PCID. + 3) All-context invalidation, retaining global translations: If "type" is 3, the logical processor invalidates all mappings tagged with any PCID except global translations, ignoring "descriptor". The instruction may also invalidate global translations as well. + +CASE type[1:0] OF +0: // individual-address invalidation retaining global translations + OP_PCID := MEM[descriptor+11:descriptor] + ADDR := MEM[descriptor+127:descriptor+64] + BREAK +1: // single PCID invalidation retaining globals + OP_PCID := MEM[descriptor+11:descriptor] + // invalidate all mappings tagged with OP_PCID except global translations + BREAK +2: // all PCID invalidation + // invalidate all mappings tagged with any PCID + BREAK +3: // all PCID invalidation retaining global translations + // invalidate all mappings tagged with any PCID except global translations + BREAK +ESAC + + + INVPCID +
immintrin.h
+ OS-Targeted +
+ + + + Flag + + + + + Decrypt 10 rounds of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + MEM[__odata+127:__odata] := AES128Decrypt (__idata[127:0], __h[383:0]) +dst := ZF + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Decrypt 10 rounds of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + MEM[__odata+127:__odata] := AES256Decrypt (__idata[127:0], __h[511:0]) +dst := ZF + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Encrypt 10 rounds of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. + MEM[__odata+127:__odata] := AES128Encrypt (__idata[127:0], __h[383:0]) +dst := ZF + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Encrypt 10 rounds of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + MEM[__odata+127:__odata] := AES256Encrypt (__idata[127:0], __h[511:0]) +dst := ZF + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Wrap a 128-bit AES key from "__key" into a 384-bit key __h stored in "__h" and set IWKey's NoBackup and KeySource bits in "dst". The explicit source operand "__htype" specifies __h restrictions. + __h[383:0] := WrapKey128(__key[127:0], __htype) +dst[0] := IWKey.NoBackup +dst[4:1] := IWKey.KeySource[3:0] + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + + Wrap a 256-bit AES key from "__key_hi" and "__key_lo" into a 512-bit key stored in "__h" and set IWKey's NoBackup and KeySource bits in "dst". The 32-bit "__htype" specifies __h restrictions. + __h[511:0] := WrapKey256(__key_lo[127:0], __key_hi[127:0], __htype) +dst[0] := IWKey.NoBackup +dst[4:1] := IWKey.KeySource[3:0] + + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + + Load internal wrapping key (IWKey). The 32-bit unsigned integer "__ctl" specifies IWKey's KeySource and whether backing up the key is permitted. IWKey's 256-bit encryption key is loaded from "__enkey_lo" and "__enkey_hi". IWKey's 128-bit integrity key is loaded from "__intkey". + + KEYLOCKER +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Decrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + FOR i := 0 to 7 + __odata[i] := AES128Decrypt (__idata[i], __h[383:0]) +ENDFOR +dst := ZF + + + KEYLOCKER_WIDE +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Decrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + FOR i := 0 to 7 + __odata[i] := AES256Decrypt (__idata[i], __h[511:0]) +ENDFOR +dst := ZF + + + KEYLOCKER_WIDE +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Encrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + FOR i := 0 to 7 + __odata[i] := AES128Encrypt (__idata[i], __h[383:0]) +ENDFOR +dst := ZF + + + KEYLOCKER_WIDE +
immintrin.h
+ Cryptography +
+ + Flag + + + + + Encrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". + FOR i := 0 to 7 + __odata[i] := AES256Encrypt (__idata[i], __h[512:0]) +ENDFOR +dst := ZF + + + KEYLOCKER_WIDE +
immintrin.h
+ Cryptography +
+ + + + + Count the number of leading zero bits in unsigned 32-bit integer "a", and return that count in "dst". + +tmp := 31 +dst := 0 +DO WHILE (tmp >= 0 AND a[tmp] == 0) + tmp := tmp - 1 + dst := dst + 1 +OD + + + LZCNT +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of leading zero bits in unsigned 64-bit integer "a", and return that count in "dst". + +tmp := 63 +dst := 0 +DO WHILE (tmp >= 0 AND a[tmp] == 0) + tmp := tmp - 1 + dst := dst + 1 +OD + + + LZCNT +
immintrin.h
+ Bit Manipulation +
+ + + + + + Copy 64-bit integer "a" to "dst". + +dst[63:0] := a[63:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to "dst". + +dst[63:0] := a[63:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst". + +dst[31:0] := a[31:0] +dst[63:32] := 0 + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst". + +dst[31:0] := a[31:0] +dst[63:32] := 0 + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to "dst". + +dst[63:0] := a[63:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to "dst". + +dst[63:0] := a[63:0] + + + MMX +
mmintrin.h
+ Convert +
+ + + + Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures. + + MMX +
mmintrin.h
+ General Support +
+ + + + Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures. + + MMX +
mmintrin.h
+ General Support +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(b[15:0]) +dst[47:40] := Saturate8(b[31:16]) +dst[55:48] := Saturate8(b[47:32]) +dst[63:56] := Saturate8(b[63:48]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(b[31:0]) +dst[63:48] := Saturate16(b[63:32]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(b[15:0]) +dst[47:40] := SaturateU8(b[31:16]) +dst[55:48] := SaturateU8(b[47:32]) +dst[63:56] := SaturateU8(b[63:48]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(b[15:0]) +dst[47:40] := Saturate8(b[31:16]) +dst[55:48] := Saturate8(b[47:32]) +dst[63:56] := Saturate8(b[63:48]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(b[31:0]) +dst[63:48] := Saturate16(b[63:32]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(b[15:0]) +dst[47:40] := SaturateU8(b[31:16]) +dst[55:48] := SaturateU8(b[47:32]) +dst[63:56] := SaturateU8(b[63:48]) + + + MMX +
mmintrin.h
+ Miscellaneous +
+ + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]) { + dst[7:0] := src1[39:32] + dst[15:8] := src2[39:32] + dst[23:16] := src1[47:40] + dst[31:24] := src2[47:40] + dst[39:32] := src1[55:48] + dst[47:40] := src2[55:48] + dst[55:48] := src1[63:56] + dst[63:56] := src2[63:56] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]) { + dst[15:0] := src1[47:32] + dst[31:16] := src2[47:32] + dst[47:32] := src1[63:48] + dst[63:48] := src2[63:48] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := b[63:32] + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[63:0], src2[63:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[63:0], src2[63:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := b[31:0] + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]) { + dst[7:0] := src1[39:32] + dst[15:8] := src2[39:32] + dst[23:16] := src1[47:40] + dst[31:24] := src2[47:40] + dst[39:32] := src1[55:48] + dst[47:40] := src2[55:48] + dst[55:48] := src1[63:56] + dst[63:56] := src2[63:56] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]) { + dst[15:0] := src1[47:32] + dst[31:16] := src2[47:32] + dst[47:32] := src1[63:48] + dst[63:48] := src2[63:48] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := b[63:32] + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[63:0], src2[63:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[63:0], src2[63:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + RETURN dst[63:0] +} +dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0]) + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := b[31:0] + + + MMX +
mmintrin.h
+ Swizzle +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[15:0] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[15:0] +ENDFOR + + + MMX +
mmintrin.h
+ Arithmetic +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". + +IF count[63:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] << count[63:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". + +IF imm8[7:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] << imm8[7:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". + +IF count[63:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] >> count[63:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". + +IF imm8[7:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] >> imm8[7:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". + +IF count[63:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] << count[63:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". + +IF imm8[7:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] << imm8[7:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". + +IF count[63:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] >> count[63:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". + +IF imm8[7:0] > 63 + dst[63:0] := 0 +ELSE + dst[63:0] := ZeroExtend64(a[63:0] >> imm8[7:0]) +FI + + + MMX +
mmintrin.h
+ Shift +
+ + + + + Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] AND b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[63:0] := ((NOT a[63:0]) AND b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] OR b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] XOR b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] AND b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[63:0] := ((NOT a[63:0]) AND b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] OR b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[63:0] := (a[63:0] XOR b[63:0]) + + + MMX +
mmintrin.h
+ Logical +
+ + + + + Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + MMX +
mmintrin.h
+ Compare +
+ + + + Return vector of type __m64 with all elements set to zero. + +dst[MAX:0] := 0 + + + MMX +
mmintrin.h
+ Set +
+ + + + + Set packed 32-bit integers in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + Set packed 16-bit integers in "dst" with the supplied values. + +dst[15:0] := e0 +dst[31:16] := e1 +dst[47:32] := e2 +dst[63:48] := e3 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values. + +dst[7:0] := e0 +dst[15:8] := e1 +dst[23:16] := e2 +dst[31:24] := e3 +dst[39:32] := e4 +dst[47:40] := e5 +dst[55:48] := e6 +dst[63:56] := e7 + + MMX +
mmintrin.h
+ Set +
+ + + + Broadcast 32-bit integer "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR + + MMX +
mmintrin.h
+ Set +
+ + + + Broadcast 16-bit integer "a" to all all elements of "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR + + MMX +
mmintrin.h
+ Set +
+ + + + Broadcast 8-bit integer "a" to all elements of "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR + + MMX +
mmintrin.h
+ Set +
+ + + + + Set packed 32-bit integers in "dst" with the supplied values in reverse order. + +dst[31:0] := e1 +dst[63:32] := e0 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + Set packed 16-bit integers in "dst" with the supplied values in reverse order. + +dst[15:0] := e3 +dst[31:16] := e2 +dst[47:32] := e1 +dst[63:48] := e0 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values in reverse order. + +dst[7:0] := e7 +dst[15:8] := e6 +dst[23:16] := e5 +dst[31:24] := e4 +dst[39:32] := e3 +dst[47:40] := e2 +dst[55:48] := e1 +dst[63:56] := e0 + + MMX +
mmintrin.h
+ Set +
+ + + + + + + + Arm address monitoring hardware using the address specified in "p". A store to an address within the specified address range triggers the monitoring hardware. Specify optional extensions in "extensions", and optional hints in "hints". + + MONITOR +
pmmintrin.h
+ General Support +
+ + + + + Hint to the processor that it can enter an implementation-dependent-optimized state while waiting for an event or store operation to the address range specified by MONITOR. + + MONITOR +
pmmintrin.h
+ General Support +
+ + + + + + Load 16 bits from memory, perform a byte swap operation, and store the result in "dst". + +FOR j := 0 to 1 + i := j*8 + dst[i+7:i] := MEM[ptr+15-i:ptr+8-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Load +
+ + + + Load 32 bits from memory, perform a byte swap operation, and store the result in "dst". + +FOR j := 0 to 3 + i := j*8 + dst[i+7:i] := MEM[ptr+31-i:ptr+24-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Load +
+ + + + Load 64 bits from memory, perform a byte swap operation, and store the result in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MEM[ptr+63-i:ptr+56-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Load +
+ + + + + Perform a bit swap operation of the 16 bits in "data", and store the results to memory. + +FOR j := 0 to 1 + i := j*8 + MEM[ptr+i+7:ptr+i] := data[15-i:8-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Store +
+ + + + + Perform a bit swap operation of the 32 bits in "data", and store the results to memory. + +addr := MEM[ptr] +FOR j := 0 to 3 + i := j*8 + MEM[ptr+i+7:ptr+i] := data[31-i:24-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Store +
+ + + + + Perform a bit swap operation of the 64 bits in "data", and store the results to memory. + +addr := MEM[ptr] +FOR j := 0 to 7 + i := j*8 + MEM[ptr+i+7:ptr+i] := data[63-i:56-i] +ENDFOR + + + MOVBE +
immintrin.h
+ Store +
+ + + + + + + Move 64-byte (512-bit) value using direct store from source memory address "src" to destination memory address "dst". + +MEM[dst+511:dst] := MEM[src+511:src] + + + MOVDIR64B +
immintrin.h
+ Store +
+ + + + + + + Store 64-bit integer from "val" into memory using direct store. + +MEM[dst+63:dst] := val[63:0] + + + MOVDIRI +
immintrin.h
+ Store +
+ + + + + Store 32-bit integer from "val" into memory using direct store. + +MEM[dst+31:dst] := val[31:0] + + + MOVDIRI +
immintrin.h
+ Store +
+ + + + + + + Make a pointer with the value of "srcmem" and bounds set to ["srcmem", "srcmem" + "size" - 1], and store the result in "dst". + dst := srcmem +dst.LB := srcmem.LB +dst.UB := srcmem + size - 1 + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + + Narrow the bounds for pointer "q" to the intersection of the bounds of "r" and the bounds ["q", "q" + "size" - 1], and store the result in "dst". + dst := q +IF r.LB > (q + size - 1) OR r.UB < q + dst.LB := 1 + dst.UB := 0 +ELSE + dst.LB := MAX(r.LB, q) + dst.UB := MIN(r.UB, (q + size - 1)) +FI + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + Make a pointer with the value of "q" and bounds set to the bounds of "r" (e.g. copy the bounds of "r" to pointer "q"), and store the result in "dst". + dst := q +dst.LB := r.LB +dst.UB := r.UB + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Make a pointer with the value of "q" and open bounds, which allow the pointer to access the entire virtual address space, and store the result in "dst". + dst := q +dst.LB := 0 +dst.UB := 0 + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + Stores the bounds of "ptr_val" pointer in memory at address "ptr_addr". + MEM[ptr_addr].LB := ptr_val.LB +MEM[ptr_addr].UB := ptr_val.UB + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Checks if "q" is within its lower bound, and throws a #BR if not. + IF q < q.LB + #BR +FI + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Checks if "q" is within its upper bound, and throws a #BR if not. + IF q > q.UB + #BR +FI + + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + Checks if ["q", "q" + "size" - 1] is within the lower and upper bounds of "q" and throws a #BR if not. + IF (q + size - 1) < q.LB OR (q + size - 1) > q.UB + #BR +FI + + + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Return the lower bound of "q". + dst := q.LB + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + Return the upper bound of "q". + dst := q.UB + + MPX +
immintrin.h
+ Miscellaneous + +
+ + + + + Set "dst" to the index of the lowest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined. + +tmp := 0 +IF a == 0 + // dst is undefined +ELSE + DO WHILE ((tmp < 32) AND a[tmp] == 0) + tmp := tmp + 1 + OD +FI +dst := tmp + + +
immintrin.h
+ Bit Manipulation +
+ + + + Set "dst" to the index of the highest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined. + +tmp := 31 +IF a == 0 + // dst is undefined +ELSE + DO WHILE ((tmp > 0) AND a[tmp] == 0) + tmp := tmp - 1 + OD +FI +dst := tmp + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. + +tmp := 0 +IF a == 0 + // MEM[index+31:index] is undefined + dst := 0 +ELSE + DO WHILE ((tmp < 32) AND a[tmp] == 0) + tmp := tmp + 1 + OD + MEM[index+31:index] := tmp + dst := (tmp == 31) ? 0 : 1 +FI + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. + +tmp := 31 +IF a == 0 + // MEM[index+31:index] is undefined + dst := 0 +ELSE + DO WHILE ((tmp > 0) AND a[tmp] == 0) + tmp := tmp - 1 + OD + MEM[index+31:index] := tmp + dst := (tmp == 0) ? 0 : 1 +FI + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. + +tmp := 0 +IF a == 0 + // MEM[index+31:index] is undefined + dst := 0 +ELSE + DO WHILE ((tmp < 64) AND a[tmp] == 0) + tmp := tmp + 1 + OD + MEM[index+31:index] := tmp + dst := (tmp == 63) ? 0 : 1 +FI + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. + +tmp := 63 +IF a == 0 + // MEM[index+31:index] is undefined + dst := 0 +ELSE + DO WHILE ((tmp > 0) AND a[tmp] == 0) + tmp := tmp - 1 + OD + MEM[index+31:index] := tmp + dst := (tmp == 0) ? 0 : 1 +FI + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 32-bit integer "a". + +addr := a + ZeroExtend64(b) +dst[0] := MEM[addr] + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 32-bit integer "a", and set that bit to its complement. + +addr := a + ZeroExtend64(b) +dst[0] := MEM[addr] +MEM[addr] := ~dst[0] + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 32-bit integer "a", and set that bit to zero. + +addr := a + ZeroExtend64(b) +dst[0] := MEM[addr] +MEM[addr] := 0 + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 32-bit integer "a", and set that bit to one. + +addr := a + ZeroExtend64(b) +dst[0] := MEM[addr] +MEM[addr] := 1 + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 64-bit integer "a". + +addr := a + b +dst[0] := MEM[addr] + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 64-bit integer "a", and set that bit to its complement. + +addr := a + b +dst[0] := MEM[addr] +MEM[addr] := ~dst[0] + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 64-bit integer "a", and set that bit to zero. + +addr := a + b +dst[0] := MEM[addr] +MEM[addr] := 0 + + +
immintrin.h
+ Bit Manipulation +
+ + + + + Return the bit at index "b" of 64-bit integer "a", and set that bit to one. + +addr := a + b +dst[0] := MEM[addr] +MEM[addr] := 1 + + +
immintrin.h
+ Bit Manipulation +
+ + + + Reverse the byte order of 32-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values. + +dst[7:0] := a[31:24] +dst[15:8] := a[23:16] +dst[23:16] := a[15:8] +dst[31:24] := a[7:0] + + +
immintrin.h
+ Bit Manipulation +
+ + + + Reverse the byte order of 64-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values. + +dst[7:0] := a[63:56] +dst[15:8] := a[55:48] +dst[23:16] := a[47:40] +dst[31:24] := a[39:32] +dst[39:32] := a[31:24] +dst[47:40] := a[23:16] +dst[55:48] := a[15:8] +dst[63:56] := a[7:0] + + +
immintrin.h
+ Bit Manipulation +
+ + + + Cast from type float to type unsigned __int32 without conversion. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +
immintrin.h
+ Cast +
+ + + + Cast from type double to type unsigned __int64 without conversion. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +
immintrin.h
+ Cast +
+ + + + Cast from type unsigned __int32 to type float without conversion. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +
immintrin.h
+ Cast +
+ + + + Cast from type unsigned __int64 to type double without conversion. + This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +
immintrin.h
+ Cast +
+ + + + + Shift the bits of unsigned long integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". + // size := 32 or 64 +dst := a +count := shift AND (size - 1) +DO WHILE (count > 0) + tmp[0] := dst[size - 1] + dst := (dst << 1) OR tmp[0] + count := count - 1 +OD + + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned long integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". + // size := 32 or 64 +dst := a +count := shift AND (size - 1) +DO WHILE (count > 0) + tmp[size - 1] := dst[0] + dst := (dst >> 1) OR tmp[size - 1] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 32-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 31 +DO WHILE (count > 0) + tmp[0] := dst[31] + dst := (dst << 1) OR tmp[0] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 32-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 31 +DO WHILE (count > 0) + tmp[31] := dst[0] + dst := (dst >> 1) OR tmp + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 16-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 15 +DO WHILE (count > 0) + tmp[0] := dst[15] + dst := (dst << 1) OR tmp[0] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 16-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 15 +DO WHILE (count > 0) + tmp[15] := dst[0] + dst := (dst >> 1) OR tmp + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 64-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 63 +DO WHILE (count > 0) + tmp[0] := dst[63] + dst := (dst << 1) OR tmp[0] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + + Shift the bits of unsigned 64-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". + +dst := a +count := shift AND 63 +DO WHILE (count > 0) + tmp[63] := dst[0] + dst := (dst >> 1) OR tmp[63] + count := count - 1 +OD + + +
immintrin.h
+ Shift +
+ + + + Treat the processor-specific feature(s) specified in "a" as available. Multiple features may be OR'd together. See the valid feature flags below: + +_FEATURE_GENERIC_IA32 +_FEATURE_FPU +_FEATURE_CMOV +_FEATURE_MMX +_FEATURE_FXSAVE +_FEATURE_SSE +_FEATURE_SSE2 +_FEATURE_SSE3 +_FEATURE_SSSE3 +_FEATURE_SSE4_1 +_FEATURE_SSE4_2 +_FEATURE_MOVBE +_FEATURE_POPCNT +_FEATURE_PCLMULQDQ +_FEATURE_AES +_FEATURE_F16C +_FEATURE_AVX +_FEATURE_RDRND +_FEATURE_FMA +_FEATURE_BMI +_FEATURE_LZCNT +_FEATURE_HLE +_FEATURE_RTM +_FEATURE_AVX2 +_FEATURE_KNCNI +_FEATURE_AVX512F +_FEATURE_ADX +_FEATURE_RDSEED +_FEATURE_AVX512ER +_FEATURE_AVX512PF +_FEATURE_AVX512CD +_FEATURE_SHA +_FEATURE_MPX +_FEATURE_AVX512BW +_FEATURE_AVX512VL +_FEATURE_AVX512VBMI +_FEATURE_AVX512_4FMAPS +_FEATURE_AVX512_4VNNIW +_FEATURE_AVX512_VPOPCNTDQ +_FEATURE_AVX512_BITALG +_FEATURE_AVX512_VBMI2 +_FEATURE_GFNI +_FEATURE_VAES +_FEATURE_VPCLMULQDQ +_FEATURE_AVX512_VNNI +_FEATURE_CLWB +_FEATURE_RDPID +_FEATURE_IBT +_FEATURE_SHSTK +_FEATURE_SGX +_FEATURE_WBNOINVD +_FEATURE_PCONFIG +_FEATURE_AXV512_4VNNIB +_FEATURE_AXV512_4FMAPH +_FEATURE_AXV512_BITALG2 +_FEATURE_AXV512_VP2INTERSECT + +
immintrin.h
+ General Support +
+ + + + Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This function is limited to bitmask values in the first 'page' of the libirc cpu-id information. This intrinsic does not check the processor vendor. See the valid feature flags below: + +_FEATURE_GENERIC_IA32 +_FEATURE_FPU +_FEATURE_CMOV +_FEATURE_MMX +_FEATURE_FXSAVE +_FEATURE_SSE +_FEATURE_SSE2 +_FEATURE_SSE3 +_FEATURE_SSSE3 +_FEATURE_SSE4_1 +_FEATURE_SSE4_2 +_FEATURE_MOVBE +_FEATURE_POPCNT +_FEATURE_PCLMULQDQ +_FEATURE_AES +_FEATURE_F16C +_FEATURE_AVX +_FEATURE_RDRND +_FEATURE_FMA +_FEATURE_BMI +_FEATURE_LZCNT +_FEATURE_HLE +_FEATURE_RTM +_FEATURE_AVX2 +_FEATURE_KNCNI +_FEATURE_AVX512F +_FEATURE_ADX +_FEATURE_RDSEED +_FEATURE_AVX512ER +_FEATURE_AVX512PF +_FEATURE_AVX512CD +_FEATURE_SHA +_FEATURE_MPX +_FEATURE_AVX512BW +_FEATURE_AVX512VL +_FEATURE_AVX512VBMI +_FEATURE_AVX512_4FMAPS +_FEATURE_AVX512_4VNNIW +_FEATURE_AVX512_VPOPCNTDQ +_FEATURE_AVX512_BITALG +_FEATURE_AVX512_VBMI2 +_FEATURE_GFNI +_FEATURE_VAES +_FEATURE_VPCLMULQDQ +_FEATURE_AVX512_VNNI +_FEATURE_CLWB +_FEATURE_RDPID +_FEATURE_IBT +_FEATURE_SHSTK +_FEATURE_SGX +_FEATURE_WBNOINVD +_FEATURE_PCONFIG +_FEATURE_AXV512_4VNNIB +_FEATURE_AXV512_4FMAPH +_FEATURE_AXV512_BITALG2 +_FEATURE_AXV512_VP2INTERSECT +_FEATURE_AXV512_FP16 + +
immintrin.h
+ General Support +
+ + + + + Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This works identically to the previous variant, except it also accepts a 'page' index that permits checking features on the 2nd page of the libirc information. When provided with a '0' in the 'page' parameter, this works identically to _may_i_use_cpu_feature. This intrinsic does not check the processor vendor. See the valid feature flags on the 2nd page below: (provided with a '1' in the 'page' parameter) + +_FEATURE_CLDEMOTE +_FEATURE_MOVDIRI +_FEATURE_MOVDIR64B +_FEATURE_WAITPKG +_FEATURE_AVX512_Bf16 +_FEATURE_ENQCMD +_FEATURE_AVX_VNNI +_FEATURE_AMX_TILE +_FEATURE_AMX_INT8 +_FEATURE_AMX_BF16 +_FEATURE_KL +_FEATURE_WIDE_KL +_FEATURE_HRESET +_FEATURE_UINTR +_FEATURE_PREFETCHI +_FEATURE_AVXVNNIINT8 +_FEATURE_CMPCCXADD +_FEATURE_AVXIFMA +_FEATURE_AVXNECONVERT +_FEATURE_RAOINT +_FEATURE_AMX_FP16 +_FEATURE_AMX_COMPLEX +_FEATURE_SHA512 +_FEATURE_SM3 +_FEATURE_SM4 +_FEATURE_AVXVNNIINT16 +_FEATURE_USERMSR +_FEATURE_AVX10_1_256 +_FEATURE_AVX10_1_512 +_FEATURE_APXF +_FEATURE_MSRLIST +_FEATURE_WRMSRNS +_FEATURE_PBNDKB + +
immintrin.h
+ General Support +
+ + + + Dynamically query the processor to determine if the processor-specific feature(s) specified a series of compile-time string literals in "feature, ..." are available, and return true or false (1 or 0) if the set of features is available. These feature names are converted to a bitmask and uses the same infrastructure as _may_i_use_cpu_feature_ext to validate it. The behavior is the same as the previous variants. This intrinsic does not check the processor vendor. Supported string literals are one-to-one corresponding in the "Operation" sections of _may_i_use_cpu_feature and _may_i_use_cpu_feature_ext. Example string literals are "avx2", "bmi", "avx512fp16", "amx-int8"... + + +
immintrin.h
+ General Support +
+ + + + Read the Performance Monitor Counter (PMC) specified by "a", and store up to 64-bits in "dst". The width of performance counters is implementation specific. + dst[63:0] := ReadPMC(a) + + +
immintrin.h
+ General Support +
+ + + + + + + Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[32:0] := a[31:0] + b[31:0] + (c_in > 0 ? 1 : 0) +MEM[out+31:out] := tmp[31:0] +dst[0] := tmp[32] +dst[7:1] := 0 + + +
immintrin.h
+ Arithmetic +
+ + + + + + + Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[64:0] := a[63:0] + b[63:0] + (c_in > 0 ? 1 : 0) +MEM[out+63:out] := tmp[63:0] +dst[0] := tmp[64] +dst[7:1] := 0 + + +
immintrin.h
+ Arithmetic +
+ + + + + + + Add unsigned 8-bit borrow "c_in" (carry flag) to unsigned 32-bit integer "b", and subtract the result from unsigned 32-bit integer "a". Store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[32:0] := a[31:0] - (b[31:0] + (c_in > 0 ? 1 : 0)) +MEM[out+31:out] := tmp[31:0] +dst[0] := tmp[32] +dst[7:1] := 0 + + +
immintrin.h
+ Arithmetic +
+ + + + + + + Add unsigned 8-bit borrow "c_in" (carry flag) to unsigned 64-bit integer "b", and subtract the result from unsigned 64-bit integer "a". Store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). + +tmp[64:0] := a[63:0] - (b[63:0] + (c_in > 0 ? 1 : 0)) +MEM[out+63:out] := tmp[63:0] +dst[0] := tmp[64] +dst[7:1] := 0 + + +
immintrin.h
+ Arithmetic +
+ + + + Insert the 32-bit data from "a" into a Processor Trace stream via a PTW packet. The PTW packet will be inserted if tracing is currently enabled and ptwrite is currently enabled. The current IP will also be inserted via a FUP packet if FUPonPTW is enabled. + +
immintrin.h
+ Miscellaneous +
+ + + + Insert the 64-bit data from "a" into a Processor Trace stream via a PTW packet. The PTW packet will be inserted if tracing is currently enabled and ptwrite is currently enabled. The current IP will also be inserted via a FUP packet if FUPonPTW is enabled. + +
immintrin.h
+ Miscellaneous +
+ + + + + Invoke the Intel SGX enclave user (non-privilege) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. + +
immintrin.h
+ Miscellaneous +
+ + + + + Invoke the Intel SGX enclave system (privileged) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. + +
immintrin.h
+ Miscellaneous +
+ + + + + Invoke the Intel SGX enclave virtualized (VMM) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. + +
immintrin.h
+ Miscellaneous +
+ + + + Write back and flush internal caches. + Initiate writing-back and flushing of external + caches. + +
immintrin.h
+ Miscellaneous +
+ + + + Convert the half-precision (16-bit) floating-point value "a" to a single-precision (32-bit) floating-point value, and store the result in "dst". + +dst[31:0] := Convert_FP16_To_FP32(a[15:0]) + +
emmintrin.h
+ Convert +
+ + + + + Convert the single-precision (32-bit) floating-point value "a" to a half-precision (16-bit) floating-point value, and store the result in "dst". + [round_note] + +dst[15:0] := Convert_FP32_To_FP16(a[31:0]) + +
emmintrin.h
+ Convert +
+ + + + + + + Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst". + +IF (imm8[0] == 0) + TEMP1 := a[63:0] +ELSE + TEMP1 := a[127:64] +FI +IF (imm8[4] == 0) + TEMP2 := b[63:0] +ELSE + TEMP2 := b[127:64] +FI +FOR i := 0 to 63 + TEMP[i] := (TEMP1[0] and TEMP2[i]) + FOR j := 1 to i + TEMP[i] := TEMP[i] XOR (TEMP1[j] AND TEMP2[i-j]) + ENDFOR + dst[i] := TEMP[i] +ENDFOR +FOR i := 64 to 127 + TEMP[i] := 0 + FOR j := (i - 63) to 63 + TEMP[i] := TEMP[i] XOR (TEMP1[j] AND TEMP2[i-j]) + ENDFOR + dst[i] := TEMP[i] +ENDFOR +dst[127] := 0 + + + PCLMULQDQ +
wmmintrin.h
+ Application-Targeted +
+ + + + + + + Invoke the PCONFIG leaf function specified by "a". The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. May return the value in eax, depending on the semantics of the specified leaf function. + + PCONFIG +
immintrin.h
+ Miscellaneous +
+ + + + + + Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst". + +dst := 0 +FOR i := 0 to 31 + IF a[i] + dst := dst + 1 + FI +ENDFOR + + + POPCNT +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of bits set to 1 in unsigned 64-bit integer "a", and return that count in "dst". + +dst := 0 +FOR i := 0 to 63 + IF a[i] + dst := dst + 1 + FI +ENDFOR + + + POPCNT +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of bits set to 1 in 32-bit integer "a", and return that count in "dst". + +dst := 0 +FOR i := 0 to 31 + IF a[i] + dst := dst + 1 + FI +ENDFOR + + + POPCNT +
immintrin.h
+ Bit Manipulation +
+ + + + Count the number of bits set to 1 in 64-bit integer "a", and return that count in "dst". + +dst := 0 +FOR i := 0 to 63 + IF a[i] + dst := dst + 1 + FI +ENDFOR + + + POPCNT +
immintrin.h
+ Bit Manipulation +
+ + + + + + Loads an instruction sequence containing the specified memory address into all level cache. + + PREFETCHI +
x86gprintrin.h
+ General Support +
+ + + + Loads an instruction sequence containing the specified memory address into all but the first-level cache. + + PREFETCHI +
x86gprintrin.h
+ General Support +
+ + + + + Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i", which can be one of:<ul> + <li>_MM_HINT_ET0 // 7, move data using the ET0 hint. The PREFETCHW instruction will be generated.</li> + <li>_MM_HINT_T0 // 3, move data using the T0 hint. The PREFETCHT0 instruction will be generated.</li> + <li>_MM_HINT_T1 // 2, move data using the T1 hint. The PREFETCHT1 instruction will be generated.</li> + <li>_MM_HINT_T2 // 1, move data using the T2 hint. The PREFETCHT2 instruction will be generated.</li> + <li>_MM_HINT_NTA // 0, move data using the non-temporal access (NTA) hint. The PREFETCHNTA instruction will be generated.</li> + + + + + + + PRFCHW +
immintrin.h
+ General Support +
+ + + + + Atomically add a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. + + +MEM[__A+31:__A] := MEM[__A+31:__A] + __B[31:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically add a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. + + +MEM[__A+63:__A] := MEM[__A+63:__A] + __B[63:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically and a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. + + +MEM[__A+31:__A] := MEM[__A+31:__A] AND __B[31:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically and a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. + + +MEM[__A+63:__A] := MEM[__A+63:__A] AND __B[63:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically or a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. + + +MEM[__A+31:__A] := MEM[__A+31:__A] OR __B[31:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically or a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. + + +MEM[__A+63:__A] := MEM[__A+63:__A] OR __B[63:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically xor a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. + + +MEM[__A+31:__A] := MEM[__A+31:__A] XOR __B[31:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + Atomically xor a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. + + +MEM[__A+63:__A] := MEM[__A+63:__A] XOR __B[63:0] + + + + RAO_INT +
x86gprintrin.h
+ Arithmetic +
+ + + + Copy the IA32_TSC_AUX MSR (signature value) into "dst". + dst[31:0] := IA32_TSC_AUX[31:0] + + + RDPID +
immintrin.h
+ General Support +
+ + + + + + Read a hardware generated 16-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_RND_GEN.ready == 1 + val[15:0] := HW_RND_GEN.data + dst := 1 +ELSE + val[15:0] := 0 + dst := 0 +FI + + + RDRAND +
immintrin.h
+ Random +
+ + + + Read a hardware generated 32-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_RND_GEN.ready == 1 + val[31:0] := HW_RND_GEN.data + dst := 1 +ELSE + val[31:0] := 0 + dst := 0 +FI + + + RDRAND +
immintrin.h
+ Random +
+ + + + Read a hardware generated 64-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_RND_GEN.ready == 1 + val[63:0] := HW_RND_GEN.data + dst := 1 +ELSE + val[63:0] := 0 + dst := 0 +FI + + + RDRAND +
immintrin.h
+ Random +
+ + + + + + Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_NRND_GEN.ready == 1 + val[15:0] := HW_NRND_GEN.data + dst := 1 +ELSE + val[15:0] := 0 + dst := 0 +FI + + + RDSEED +
immintrin.h
+ Random +
+ + + + Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_NRND_GEN.ready == 1 + val[31:0] := HW_NRND_GEN.data + dst := 1 +ELSE + val[31:0] := 0 + dst := 0 +FI + + + RDSEED +
immintrin.h
+ Random +
+ + + + Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. + IF HW_NRND_GEN.ready == 1 + val[63:0] := HW_NRND_GEN.data + dst := 1 +ELSE + val[63:0] := 0 + dst := 0 +FI + + + RDSEED +
immintrin.h
+ Random +
+ + + + + + Copy the current 64-bit value of the processor's time-stamp counter into "dst", and store the IA32_TSC_AUX MSR (signature value) into memory at "mem_addr". + dst[63:0] := TimeStampCounter +MEM[mem_addr+31:mem_addr] := IA32_TSC_AUX[31:0] + + + RDTSCP +
immintrin.h
+ General Support +
+ + + + + + Force an RTM abort. The EAX register is updated to reflect an XABORT instruction caused the abort, and the "imm8" parameter will be provided in bits [31:24] of EAX. + Following an RTM abort, the logical processor resumes execution at the fallback address computed through the outermost XBEGIN instruction. + IF RTM_ACTIVE == 0 + // nop +ELSE + // restore architectural register state + // discard memory updates performed in transaction + // update EAX with status and imm8 value + eax[31:24] := imm8[7:0] + RTM_NEST_COUNT := 0 + RTM_ACTIVE := 0 + IF _64_BIT_MODE + RIP := fallbackRIP + ELSE + EIP := fallbackEIP + FI +FI + + + RTM +
immintrin.h
+ General Support +
+ + + + Specify the start of an RTM code region. + If the logical processor was not already in transactional execution, then this call causes the logical processor to transition into transactional execution. + On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution, restores architectural state, and starts execution beginning at the fallback address computed from the outermost XBEGIN instruction. Return status of ~0 (0xFFFF) if continuing inside transaction; all other codes are aborts. + IF RTM_NEST_COUNT < MAX_RTM_NEST_COUNT + RTM_NEST_COUNT := RTM_NEST_COUNT + 1 + IF RTM_NEST_COUNT == 1 + IF _64_BIT_MODE + fallbackRIP := RIP + ELSE IF _32_BIT_MODE + fallbackEIP := EIP + FI + + RTM_ACTIVE := 1 + // enter RTM execution, record register state, start tracking memory state + FI +ELSE + // RTM abort (see _xabort) +FI + + + RTM +
immintrin.h
+ General Support +
+ + + + Specify the end of an RTM code region. + If this corresponds to the outermost scope, the logical processor will attempt to commit the logical processor state atomically. + If the commit fails, the logical processor will perform an RTM abort. + IF RTM_ACTIVE == 1 + RTM_NEST_COUNT := RTM_NEST_COUNT - 1 + IF RTM_NEST_COUNT == 0 + // try to commit transaction + IF FAIL_TO_COMMIT_TRANSACTION + // RTM abort (see _xabort) + ELSE + RTM_ACTIVE := 0 + FI + FI +FI + + + RTM +
immintrin.h
+ General Support +
+ + + + Query the transactional execution status, return 1 if inside a transactionally executing RTM or HLE region, and return 0 otherwise. + IF (RTM_ACTIVE == 1 OR HLE_ACTIVE == 1) + dst := 1 +ELSE + dst := 0 +FI + + + RTM +
immintrin.h
+ General Support +
+ + + + + Serialize instruction execution, ensuring all modifications to flags, registers, and memory by previous instructions are completed before the next instruction is fetched. + + SERIALIZE +
immintrin.h
+ General Support +
+ + + + + + + Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst". + +W0 := a[127:96] +W1 := a[95:64] +W2 := a[63:32] +W3 := a[31:0] +W4 := b[127:96] +W5 := b[95:64] +dst[127:96] := W2 XOR W0 +dst[95:64] := W3 XOR W1 +dst[63:32] := W4 XOR W2 +dst[31:0] := W5 XOR W3 + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in "a" and the previous message values in "b", and store the result in "dst". + +W13 := b[95:64] +W14 := b[63:32] +W15 := b[31:0] +W16 := (a[127:96] XOR W13) <<< 1 +W17 := (a[95:64] XOR W14) <<< 1 +W18 := (a[63:32] XOR W15) <<< 1 +W19 := (a[31:0] XOR W16) <<< 1 +dst[127:96] := W16 +dst[95:64] := W17 +dst[63:32] := W18 +dst[31:0] := W19 + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable "a", add that value to the scheduled values (unsigned 32-bit integers) in "b", and store the result in "dst". + +tmp := (a[127:96] <<< 30) +dst[127:96] := b[127:96] + tmp +dst[95:64] := b[95:64] +dst[63:32] := b[63:32] +dst[31:0] := b[31:0] + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + + Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from "a" and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from "b", and store the updated SHA1 state (A,B,C,D) in "dst". "func" contains the logic functions and round constants. + IF (func[1:0] == 0) + f := f0() + K := K0 +ELSE IF (func[1:0] == 1) + f := f1() + K := K1 +ELSE IF (func[1:0] == 2) + f := f2() + K := K2 +ELSE IF (func[1:0] == 3) + f := f3() + K := K3 +FI +A := a[127:96] +B := a[95:64] +C := a[63:32] +D := a[31:0] +W[0] := b[127:96] +W[1] := b[95:64] +W[2] := b[63:32] +W[3] := b[31:0] +A[1] := f(B, C, D) + (A <<< 5) + W[0] + K +B[1] := A +C[1] := B <<< 30 +D[1] := C +E[1] := D +FOR i := 1 to 3 + A[i+1] := f(B[i], C[i], D[i]) + (A[i] <<< 5) + W[i] + E[i] + K + B[i+1] := A[i] + C[i+1] := B[i] <<< 30 + D[i+1] := C[i] + E[i+1] := D[i] +ENDFOR +dst[127:96] := A[4] +dst[95:64] := B[4] +dst[63:32] := C[4] +dst[31:0] := D[4] + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst". + W4 := b[31:0] +W3 := a[127:96] +W2 := a[95:64] +W1 := a[63:32] +W0 := a[31:0] +dst[127:96] := W3 + sigma0(W4) +dst[95:64] := W2 + sigma0(W3) +dst[63:32] := W1 + sigma0(W2) +dst[31:0] := W0 + sigma0(W1) + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst"." + W14 := b[95:64] +W15 := b[127:96] +W16 := a[31:0] + sigma1(W14) +W17 := a[63:32] + sigma1(W15) +W18 := a[95:64] + sigma1(W16) +W19 := a[127:96] + sigma1(W17) +dst[127:96] := W19 +dst[95:64] := W18 +dst[63:32] := W17 +dst[31:0] := W16 + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + + Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from "a", an initial SHA256 state (A,B,E,F) from "b", and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from "k", and store the updated SHA256 state (A,B,E,F) in "dst". + A[0] := b[127:96] +B[0] := b[95:64] +C[0] := a[127:96] +D[0] := a[95:64] +E[0] := b[63:32] +F[0] := b[31:0] +G[0] := a[63:32] +H[0] := a[31:0] +W_K[0] := k[31:0] +W_K[1] := k[63:32] +FOR i := 0 to 1 + A[i+1] := Ch(E[i], F[i], G[i]) + sum1(E[i]) + W_K[i] + H[i] + Maj(A[i], B[i], C[i]) + sum0(A[i]) + B[i+1] := A[i] + C[i+1] := B[i] + D[i+1] := C[i] + E[i+1] := Ch(E[i], F[i], G[i]) + sum1(E[i]) + W_K[i] + H[i] + D[i] + F[i+1] := E[i] + G[i+1] := F[i] + H[i+1] := G[i] +ENDFOR +dst[127:96] := A[2] +dst[95:64] := B[2] +dst[63:32] := E[2] +dst[31:0] := F[2] + + + SHA +
immintrin.h
+ Cryptography +
+ + + + + This intrinisc is one of the two SHA512 message scheduling instructions. The intrinsic performs an intermediate calculation for the next four SHA512 message qwords. The calculated results are stored in "dst". + + +DEFINE ROR64(qword, n) { + count := n % 64 + dest := (qword >> count) | (qword << (64 - count)) + RETURN dest +} +DEFINE SHR64(qword, n) { + RETURN qword >> n +} +DEFINE s0(qword) { + RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7) +} +W.qword[4] := __B.qword[0] +W.qword[3] := __A.qword[3] +W.qword[2] := __A.qword[2] +W.qword[1] := __A.qword[1] +W.qword[0] := __A.qword[0] +dst.qword[3] := W.qword[3] + s0(W.qword[4]) +dst.qword[2] := W.qword[2] + s0(W.qword[3]) +dst.qword[1] := W.qword[1] + s0(W.qword[2]) +dst.qword[0] := W.qword[0] + s0(W.qword[1]) + + + + SHA512 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinisc is one of the two SHA512 message scheduling instructions. The intrinsic performs the final calculation for the next four SHA512 message qwords. The calculated results are stored in "dst". + + +DEFINE ROR64(qword, n) { + count := n % 64 + dest := (qword >> count) | (qword << (64 - count)) + RETURN dest +} +DEFINE SHR64(qword, n) { + RETURN qword >> n +} +DEFINE s1(qword) { + RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6) +} +W.qword[14] := __B.qword[2] +W.qword[15] := __B.qword[3] +W.qword[16] := __A.qword[0] + s1(W.qword[14]) +W.qword[17] := __A.qword[1] + s1(W.qword[15]) +W.qword[18] := __A.qword[2] + s1(W.qword[16]) +W.qword[19] := __A.qword[3] + s1(W.qword[17]) +dst.qword[3] := W.qword[19] +dst.qword[2] := W.qword[18] +dst.qword[1] := W.qword[17] +dst.qword[0] := W.qword[16] + + + + SHA512 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinisc performs two rounds of SHA512 operation using initial SHA512 state (C,D,G,H) from "__A", an initial SHA512 state (A,B,E,F) from "__B", and a pre-computed sum of the next two round message qwords and the corresponding round constants from "__C" (only the two lower qwords of the third operand). The updated SHA512 state (A,B,E,F) is written to "dst", and "dst" can be used as the updated state (C,D,G,H) in later rounds. + + +DEFINE ROR64(qword, n) { + count := n % 64 + dest := (qword >> count) | (qword << (64 - count)) + RETURN dest +} +DEFINE SHR64(qword, n) { + RETURN qword >> n +} +DEFINE cap_sigma0(qword) { + RETURN ROR64(qword, 28) ^ ROR64(qword, 34) ^ ROR64(qword, 39) +} +DEFINE cap_sigma1(qword) { + RETURN ROR64(qword, 14) ^ ROR64(qword, 18) ^ ROR64(qword, 41) +} +DEFINE MAJ(a,b,c) { + RETURN (a & b) ^ (a & c) ^ (b & c) +} +DEFINE CH(a,b,c) { + RETURN (a & b) ^ (c & ~a) +} +A.qword[0] := __B.qword[3] +B.qword[0] := __B.qword[2] +C.qword[0] := __A.qword[3] +D.qword[0] := __A.qword[2] +E.qword[0] := __B.qword[1] +F.qword[0] := __B.qword[0] +G.qword[0] := __A.qword[1] +H.qword[0] := __A.qword[0] +WK.qword[0]:= __C.qword[0] +WK.qword[1]:= __C.qword[1] +FOR i := 0 to 1 + A.qword[i+1] := CH(E.qword[i], F.qword[i], G.qword[i]) + cap_sigma1(E.qword[i]) + WK.qword[i] + H.qword[i] + MAJ(A.qword[i], B.qword[i], C.qword[i]) + cap_sigma0(A.qword[i]) + B.qword[i+1] := A.qword[i] + C.qword[i+1] := B.qword[i] + D.qword[i+1] := C.qword[i] + E.qword[i+1] := CH(E.qword[i], F.qword[i], G.qword[i]) + cap_sigma1(E.qword[i]) + WK.qword[i] + H.qword[i] + D.qword[i] + F.qword[i+1] := E.qword[i] + G.qword[i+1] := F.qword[i] + H.qword[i+1] := G.qword[i] +ENDFOR +dst.qword[3] := A.qword[2] +dst.qword[2] := B.qword[2] +dst.qword[1] := E.qword[2] +dst.qword[0] := F.qword[2] + + + + + SHA512 + AVX +
immintrin.h
+ Cryptography +
+ + + The VSM3MSG1 intrinsic is one of the two SM3 message scheduling intrinsics. The intrinsic performs an initial calculation for the next four SM3 message words. The calculated results are stored in "dst". + + +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32 - count)) + RETURN dest +} +DEFINE P1(x) { + RETURN x ^ ROL32(x, 15) ^ ROL32(x, 23) +} +W.dword[0] := __C.dword[0] +W.dword[1] := __C.dword[1] +W.dword[2] := __C.dword[2] +W.dword[3] := __C.dword[3] +W.dword[7] := __A.dword[0] +W.dword[8] := __A.dword[1] +W.dword[9] := __A.dword[2] +W.dword[10] := __A.dword[3] +W.dword[13] := __B.dword[0] +W.dword[14] := __B.dword[1] +W.dword[15] := __B.dword[2] +TMP0 := W.dword[7] ^ W.dword[0] ^ ROL32(W.dword[13], 15) +TMP1 := W.dword[8] ^ W.dword[1] ^ ROL32(W.dword[14], 15) +TMP2 := W.dword[9] ^ W.dword[2] ^ ROL32(W.dword[15], 15) +TMP3 := W.dword[10] ^ W.dword[3] +dst.dword[0] := P1(TMP0) +dst.dword[1] := P1(TMP1) +dst.dword[2] := P1(TMP2) +dst.dword[3] := P1(TMP3) + + + + + SM3 + AVX +
immintrin.h
+ Cryptography +
+ + + The VSM3MSG2 intrinsic is one of the two SM3 message scheduling intrinsics. The intrinsic performs the final calculation for the next four SM3 message words. The calculated results are stored in "dst". + + +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +WTMP.dword[0] := __A.dword[0] +WTMP.dword[1] := __A.dword[1] +WTMP.dword[2] := __A.dword[2] +WTMP.dword[3] := __A.dword[3] +W.dword[3] := __B.dword[0] +W.dword[4] := __B.dword[1] +W.dword[5] := __B.dword[2] +W.dword[6] := __B.dword[3] +W.dword[10] := __C.dword[0] +W.dword[11] := __C.dword[1] +W.dword[12] := __C.dword[2] +W.dword[13] := __C.dword[3] +W.dword[16] := ROL32(W.dword[3], 7) ^ W.dword[10] ^ WTMP.dword[0] +W.dword[17] := ROL32(W.dword[4], 7) ^ W.dword[11] ^ WTMP.dword[1] +W.dword[18] := ROL32(W.dword[5], 7) ^ W.dword[12] ^ WTMP.dword[2] +W.dword[19] := ROL32(W.dword[6], 7) ^ W.dword[13] ^ WTMP.dword[3] +W.dword[19] := W.dword[19] ^ ROL32(W.dword[16], 6) ^ ROL32(W.dword[16], 15) ^ ROL32(W.dword[16], 30) +dst.dword[0] := W.dword[16] +dst.dword[1] := W.dword[17] +dst.dword[2] := W.dword[18] +dst.dword[3] := W.dword[19] + + + + + SM3 + AVX +
immintrin.h
+ Cryptography +
+ + + The intrinsic performs two rounds of SM3 operation using initial SM3 state (C, D, G, H) from "__A", an initial SM3 states (A, B, E, F) from "__B" and a pre-computed words from the "__C". "__A" with initial SM3 state of (C, D, G, H) assumes input of non-rotated left variables from previous state. The updated SM3 state (A, B, E, F) is written to "__A". The "imm8" should contain the even round number for the first of the two rounds computed by this instruction. The computation masks the "imm8" value by ANDing it with 0x3E so that only even round numbers from 0 through 62 are used for this operation. The calculated results are stored in "dst". + + +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE P0(x) { + RETURN x ^ ROL32(x, 9) ^ ROL32(x, 17) +} +DEFINE FF(x, y, z, round) { + IF round < 16 + RETURN (x ^ y ^ z) + ELSE + RETURN (x & y) | (x & z) | (y & z) + FI +} +DEFINE GG(x, y, z, round){ + IF round < 16 + RETURN (x ^ y ^ z) + ELSE + RETURN (x & y) | (~x & z) + FI +} +A.dword[0] := __B.dword[3] +B.dword[0] := __B.dword[2] +C.dword[0] := __A.dword[3] +D.dword[0] := __A.dword[2] +E.dword[0] := __B.dword[1] +F.dword[0] := __B.dword[0] +G.dword[0] := __A.dword[1] +H.dword[0] := __A.dword[0] +W.dword[0] := __C.dword[0] +W.dword[1] := __C.dword[1] +W.dword[4] := __C.dword[2] +W.dword[5] := __C.dword[3] +C.dword[0] := ROL32(C.dword[0], 9) +D.dword[0] := ROL32(D.dword[0], 9) +G.dword[0] := ROL32(G.dword[0], 19) +H.dword[0] := ROL32(H.dword[0], 19) +ROUND := imm8 & 0x3E +IF ROUND < 16 + CONST.dword[0] := 0x79CC4519 +ELSE + CONST.dword[0] := 0x7A879D8A +FI +CONST.dword[0] := ROL32(CONST.dword[0], ROUND) +FOR i:= 0 to 1 + temp.dword[0] := ROL32(A.dword[i], 12) + E.dword[i] + CONST.dword[0] + S1.dword[0] := ROL32(temp.dword[0], 7) + S2.dword[0] := S1.dword[0] ^ ROL32(A.dword[i], 12) + T1.dword[0] := FF(A.dword[i], B.dword[i], C.dword[i], ROUND) + D.dword[i] + S2.dword[0] + (W.dword[i] ^ W.dword[i+4]) + T2.dword[0] := GG(E.dword[i], F.dword[i], G.dword[i], ROUND) + H.dword[i] + S1.dword[0] + W.dword[i] + D.dword[i+1] := C.dword[i] + C.dword[i+1] := ROL32(B.dword[i], 9) + B.dword[i+1] := A.dword[i] + A.dword[i+1] := T1.dword[0] + H.dword[i+1] := G.dword[i] + G.dword[i+1] := ROL32(F.dword[i], 19) + F.dword[i+1] := E.dword[i] + E.dword[i+1] := P0(T2.dword[0]) + CONST.dword[0] := ROL32(CONST.dword[0], 1) +ENDFOR +dst.dword[3] := A.dword[2] +dst.dword[2] := B.dword[2] +dst.dword[1] := E.dword[2] +dst.dword[0] := F.dword[2] + + + + + + SM3 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent 128-bit lanes. The calculated results are stored in "dst". + + +BYTE sbox[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +} +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE SBOX_BYTE(dword, i) { + RETURN sbox[dword.byte[i]] +} +DEFINE lower_t(dword) { + tmp.byte[0] := SBOX_BYTE(dword, 0) + tmp.byte[1] := SBOX_BYTE(dword, 1) + tmp.byte[2] := SBOX_BYTE(dword, 2) + tmp.byte[3] := SBOX_BYTE(dword, 3) + RETURN tmp +} +DEFINE L_KEY(dword) { + RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) +} +DEFINE T_KEY(dword) { + RETURN L_KEY(lower_t(dword)) +} +DEFINE F_KEY(X0, X1, X2, X3, round_key) { + RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) +} +FOR i:= 0 to 1 + P.dword[0] := __A.dword[4*i] + P.dword[1] := __A.dword[4*i+1] + P.dword[2] := __A.dword[4*i+2] + P.dword[3] := __A.dword[4*i+3] + C.dword[0] := F_KEY(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[4*i]) + C.dword[1] := F_KEY(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[4*i+1]) + C.dword[2] := F_KEY(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[4*i+2]) + C.dword[3] := F_KEY(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[4*i+3]) + dst.dword[4*i] := C.dword[0] + dst.dword[4*i+1] := C.dword[1] + dst.dword[4*i+2] := C.dword[2] + dst.dword[4*i+3] := C.dword[3] +ENDFOR +dst[MAX:256] := 0 + + + + SM4 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinisc performs four rounds of SM4 encryption. The intrinisc operates on independent 128-bit lanes. The calculated results are stored in "dst". + + BYTE sbox[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +} +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE SBOX_BYTE(dword, i) { + RETURN sbox[dword.byte[i]] +} +DEFINE lower_t(dword) { + tmp.byte[0] := SBOX_BYTE(dword, 0) + tmp.byte[1] := SBOX_BYTE(dword, 1) + tmp.byte[2] := SBOX_BYTE(dword, 2) + tmp.byte[3] := SBOX_BYTE(dword, 3) + RETURN tmp +} +DEFINE L_RND(dword) { + tmp := dword + tmp := tmp ^ ROL32(dword, 2) + tmp := tmp ^ ROL32(dword, 10) + tmp := tmp ^ ROL32(dword, 18) + tmp := tmp ^ ROL32(dword, 24) + RETURN tmp +} +DEFINE T_RND(dword) { + RETURN L_RND(lower_t(dword)) +} +DEFINE F_RND(X0, X1, X2, X3, round_key) { + RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) +} +FOR i:= 0 to 1 + P.dword[0] := __A.dword[4*i] + P.dword[1] := __A.dword[4*i+1] + P.dword[2] := __A.dword[4*i+2] + P.dword[3] := __A.dword[4*i+3] + C.dword[0] := F_RND(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[4*i]) + C.dword[1] := F_RND(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[4*i+1]) + C.dword[2] := F_RND(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[4*i+2]) + C.dword[3] := F_RND(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[4*i+3]) + dst.dword[4*i] := C.dword[0] + dst.dword[4*i+1] := C.dword[1] + dst.dword[4*i+2] := C.dword[2] + dst.dword[4*i+3] := C.dword[3] +ENDFOR +dst[MAX:256] := 0 + + + + SM4 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent 128-bit lanes. The calculated results are stored in "dst". + + +BYTE sbox[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +} +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE SBOX_BYTE(dword, i) { + RETURN sbox[dword.byte[i]] +} +DEFINE lower_t(dword) { + tmp.byte[0] := SBOX_BYTE(dword, 0) + tmp.byte[1] := SBOX_BYTE(dword, 1) + tmp.byte[2] := SBOX_BYTE(dword, 2) + tmp.byte[3] := SBOX_BYTE(dword, 3) + RETURN tmp +} +DEFINE L_KEY(dword) { + RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) +} +DEFINE T_KEY(dword) { + RETURN L_KEY(lower_t(dword)) +} +DEFINE F_KEY(X0, X1, X2, X3, round_key) { + RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) +} +P.dword[0] := __A.dword[0] +P.dword[1] := __A.dword[1] +P.dword[2] := __A.dword[2] +P.dword[3] := __A.dword[3] +C.dword[0] := F_KEY(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[0]) +C.dword[1] := F_KEY(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[1]) +C.dword[2] := F_KEY(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[2]) +C.dword[3] := F_KEY(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[3]) +dst.dword[0] := C.dword[0] +dst.dword[1] := C.dword[1] +dst.dword[2] := C.dword[2] +dst.dword[3] := C.dword[3] +dst[MAX:128] := 0 + + + + SM4 + AVX +
immintrin.h
+ Cryptography +
+ + + This intrinisc performs four rounds of SM4 encryption. The intrinisc operates on independent 128-bit lanes. The calculated results are stored in "dst". + + +BYTE sbox[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +} +DEFINE ROL32(dword, n) { + count := n % 32 + dest := (dword << count) | (dword >> (32-count)) + RETURN dest +} +DEFINE SBOX_BYTE(dword, i) { + RETURN sbox[dword.byte[i]] +} +DEFINE lower_t(dword) { + tmp.byte[0] := SBOX_BYTE(dword, 0) + tmp.byte[1] := SBOX_BYTE(dword, 1) + tmp.byte[2] := SBOX_BYTE(dword, 2) + tmp.byte[3] := SBOX_BYTE(dword, 3) + RETURN tmp +} +DEFINE L_RND(dword) { + tmp := dword + tmp := tmp ^ ROL32(dword, 2) + tmp := tmp ^ ROL32(dword, 10) + tmp := tmp ^ ROL32(dword, 18) + tmp := tmp ^ ROL32(dword, 24) + RETURN tmp +} +DEFINE T_RND(dword) { + RETURN L_RND(lower_t(dword)) +} +DEFINE F_RND(X0, X1, X2, X3, round_key) { + RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) +} +P.dword[0] := __A.dword[0] +P.dword[1] := __A.dword[1] +P.dword[2] := __A.dword[2] +P.dword[3] := __A.dword[3] +C.dword[0] := F_RND(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[0]) +C.dword[1] := F_RND(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[1]) +C.dword[2] := F_RND(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[2]) +C.dword[3] := F_RND(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[3]) +dst.dword[0] := C.dword[0] +dst.dword[1] := C.dword[1] +dst.dword[2] := C.dword[2] +dst.dword[3] := C.dword[3] +dst[MAX:128] := 0 + + + + SM4 + AVX +
immintrin.h
+ Cryptography +
+ + + + Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ACOS(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ACOS(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ACOSH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ACOSH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ASIN(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ASIN(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ASINH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ASINH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ATAN(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ATAN(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ATANH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ATANH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := COSD(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := COSD(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := COSH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := COSH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SIN(a[i+63:i]) + MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + + Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SIN(a[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SIND(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SIND(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SINH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SINH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := TAN(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := TAN(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := TAND(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := TAND(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := TANH(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := TANH(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Trigonometry +
+ + + + Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := CubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CEXP(a[31:0], b[31:0]) { + result[31:0] := POW(FP32(e), a[31:0]) * COS(b[31:0]) + result[63:32] := POW(FP32(e), a[31:0]) * SIN(b[31:0]) + RETURN result +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CEXP(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CLOG(a[31:0], b[31:0]) { + result[31:0] := LOG(SQRT(POW(a, 2.0) + POW(b, 2.0))) + result[63:32] := ATAN2(b, a) + RETURN result +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CLOG(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed complex snumbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". + +DEFINE CSQRT(a[31:0], b[31:0]) { + sign[31:0] := (b < 0.0) ? -FP32(1.0) : FP32(1.0) + result[31:0] := SQRT((a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) + result[63:32] := sign * SQRT((-a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) + RETURN result +} +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CSQRT(a[i+31:i], a[i+63:i+32]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(10.0, a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(2.0, a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := InvCubeRoot(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := InvCubeRoot(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := InvSQRT(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := InvSQRT(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LOG(1.0 + a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LOG(1.0 + a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ConvertExpFP64(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ConvertExpFP32(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + + Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Elementary Math Functions +
+ + + + Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := CDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := InverseCDFNormal(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := InverseCDFNormal(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ERF(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := 1.0 - ERF(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+63:i] := 1.0 - ERF(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := 1.0 / ERF(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+63:i] := 1.0 / ERF(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Probability/Statistics +
+ + + + + Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 3 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 1 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 15 + i := 8*j + IF b[i+7:i] == 0 + #DE + FI + dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 7 + i := 16*j + IF b[i+15:i] == 0 + #DE + FI + dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 3 + i := 32*j + IF b[i+31:i] == 0 + #DE + FI + dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + +FOR j := 0 to 1 + i := 64*j + IF b[i+63:i] == 0 + #DE + FI + dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ERF(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 15 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 7 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". + FOR j := 0 to 1 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 15 + i := 8*j + dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 7 + i := 16*j + dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 1 + i := 64*j + dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) + MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + + Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". + FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Arithmetic +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ROUND(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Special Math Functions +
+ + + + Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := TRUNCATE(a[i+63:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Miscellaneous +
+ + + + Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := TRUNCATE(a[i+31:i]) +ENDFOR +dst[MAX:128] := 0 + + SSE +
immintrin.h
+ Miscellaneous +
+ + + + + + + + + Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision (32-bit) floating-point elements in "row0", "row1", "row2", and "row3", and store the transposed matrix in these vectors ("row0" now contains column 0, etc.). + +__m128 tmp3, tmp2, tmp1, tmp0; +tmp0 := _mm_unpacklo_ps(row0, row1); +tmp2 := _mm_unpacklo_ps(row2, row3); +tmp1 := _mm_unpackhi_ps(row0, row1); +tmp3 := _mm_unpackhi_ps(row2, row3); +row0 := _mm_movelh_ps(tmp0, tmp2); +row1 := _mm_movehl_ps(tmp2, tmp0); +row2 := _mm_movelh_ps(tmp1, tmp3); +row3 := _mm_movehl_ps(tmp3, tmp1); + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". + +dst[15:0] := (a[63:0] >> (imm8[1:0] * 16))[15:0] +dst[31:16] := 0 + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". + +dst[15:0] := (a[63:0] >> (imm8[1:0] * 16))[15:0] +dst[31:16] := 0 + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". + +dst[63:0] := a[63:0] +sel := imm8[1:0]*16 +dst[sel+15:sel] := i[15:0] + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". + +dst[63:0] := a[63:0] +sel := imm8[1:0]*16 +dst[sel+15:sel] := i[15:0] + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[15:0] := src[15:0] + 1: tmp[15:0] := src[31:16] + 2: tmp[15:0] := src[47:32] + 3: tmp[15:0] := src[63:48] + ESAC + RETURN tmp[15:0] +} +dst[15:0] := SELECT4(a[63:0], imm8[1:0]) +dst[31:16] := SELECT4(a[63:0], imm8[3:2]) +dst[47:32] := SELECT4(a[63:0], imm8[5:4]) +dst[63:48] := SELECT4(a[63:0], imm8[7:6]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[15:0] := src[15:0] + 1: tmp[15:0] := src[31:16] + 2: tmp[15:0] := src[47:32] + 3: tmp[15:0] := src[63:48] + ESAC + RETURN tmp[15:0] +} +dst[15:0] := SELECT4(a[63:0], imm8[1:0]) +dst[31:16] := SELECT4(a[63:0], imm8[3:2]) +dst[47:32] := SELECT4(a[63:0], imm8[5:4]) +dst[63:48] := SELECT4(a[63:0], imm8[7:6]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + + Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(b[127:0], imm8[5:4]) +dst[127:96] := SELECT4(b[127:0], imm8[7:6]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the high half "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) + + + SSE +
xmmintrin.h
+ Swizzle +
+ + + + Get the unsigned 32-bit value of the MXCSR control and status register. + dst[31:0] := MXCSR + + + SSE +
immintrin.h
+ General Support +
+ + + + Set the MXCSR control and status register with the value in unsigned 32-bit integer "a". + +MXCSR := a[31:0] + + + SSE +
immintrin.h
+ General Support +
+ + + Macro: Get the exception state bits from the MXCSR control and status register. The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT + dst[31:0] := MXCSR & _MM_EXCEPT_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + Macro: Set the exception state bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT + MXCSR := a[31:0] AND ~_MM_EXCEPT_MASK + + SSE +
immintrin.h
+ General Support +
+ + + Macro: Get the exception mask bits from the MXCSR control and status register. The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT + dst[31:0] := MXCSR & _MM_MASK_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + Macro: Set the exception mask bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT + MXCSR := a[31:0] AND ~_MM_MASK_MASK + + SSE +
immintrin.h
+ General Support +
+ + + Macro: Get the rounding mode bits from the MXCSR control and status register. The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO + dst[31:0] := MXCSR & _MM_ROUND_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + Macro: Set the rounding mode bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO + MXCSR := a[31:0] AND ~_MM_ROUND_MASK + + SSE +
immintrin.h
+ General Support +
+ + + Macro: Get the flush zero bits from the MXCSR control and status register. The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF + dst[31:0] := MXCSR & _MM_FLUSH_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + Macro: Set the flush zero bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF + MXCSR := a[31:0] AND ~_MM_FLUSH_MASK + + SSE +
immintrin.h
+ General Support +
+ + + + + Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i", which can be one of:<ul> + <li>_MM_HINT_T0 // 3, move data using the T0 hint. The PREFETCHT0 instruction will be generated.</li> + <li>_MM_HINT_T1 // 2, move data using the T1 hint. The PREFETCHT1 instruction will be generated.</li> + <li>_MM_HINT_T2 // 1, move data using the T2 hint. The PREFETCHT2 instruction will be generated.</li> + <li>_MM_HINT_NTA // 0, move data using the non-temporal access (NTA) hint. The PREFETCHNTA instruction will be generated.</li> + + + + + + SSE +
immintrin.h
+ General Support +
+ + + + Perform a serializing operation on all store-to-memory instructions that were issued prior to this instruction. Guarantees that every store instruction that precedes, in program order, is globally visible before any store instruction which follows the fence in program order. + + SSE +
immintrin.h
+ General Support +
+ + + + + Allocate "size" bytes of memory, aligned to the alignment specified in "align", and return a pointer to the allocated memory. "_mm_free" should be used to free memory that is allocated with "_mm_malloc". + SSE +
immintrin.h
+ General Support +
+ + + + Free aligned memory that was allocated with "_mm_malloc". + SSE +
immintrin.h
+ General Support +
+ + + + Return vector of type __m128 with undefined elements. + SSE +
immintrin.h
+ General Support +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst". [min_float_note] + +dst[31:0] := MIN(a[31:0], b[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst". [max_float_note] + +dst[31:0] := MAX(a[31:0], b[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Special Math Functions +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst". + +FOR j := 0 to 7 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56] +dst[63:16] := 0 + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst". + +FOR j := 0 to 7 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56] +dst[63:16] := 0 + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := a[31:0] + b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := a[31:0] - b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := a[31:0] * b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] * b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := a[31:0] / b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := a[i+31:i] / b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Arithmetic +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR + + + SSE +
xmmintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR + + + SSE +
xmmintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR + + + SSE +
xmmintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR + + + SSE +
xmmintrin.h
+ Probability/Statistics +
+ + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert packed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[63:32] := Convert_Int32_To_FP32(b[63:32]) +dst[95:64] := a[95:64] +dst[127:96] := a[127:96] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +dst[63:32] := Convert_Int32_To_FP32(b[63:32]) +dst[95:64] := a[95:64] +dst[127:96] := a[127:96] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + m := j*32 + dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed unsigned 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*16 + m := j*32 + dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower packed 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*8 + m := j*32 + dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower packed unsigned 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := j*8 + m := j*32 + dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", then covert the packed signed 32-bit integers in "b" to single-precision (32-bit) floating-point element, and store the results in the upper 2 elements of "dst". + +dst[31:0] := Convert_Int32_To_FP32(a[31:0]) +dst[63:32] := Convert_Int32_To_FP32(a[63:32]) +dst[95:64] := Convert_Int32_To_FP32(b[31:0]) +dst[127:96] := Convert_Int32_To_FP32(b[63:32]) + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_Int64(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". + +dst[31:0] := a[31:0] + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". Note: this intrinsic will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and 0x7FFFFFFF. + +FOR j := 0 to 3 + i := 16*j + k := 32*j + IF a[k+31:k] >= FP32(0x7FFF) && a[k+31:k] <= FP32(0x7FFFFFFF) + dst[i+15:i] := 0x7FFF + ELSE + dst[i+15:i] := Convert_FP32_To_Int16(a[k+31:k]) + FI +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 8-bit integers, and store the results in lower 4 elements of "dst". Note: this intrinsic will generate 0x7F, rather than 0x80, for input values between 0x7F and 0x7FFFFFFF. + +FOR j := 0 to 3 + i := 8*j + k := 32*j + IF a[k+31:k] >= FP32(0x7F) && a[k+31:k] <= FP32(0x7FFFFFFF) + dst[i+7:i] := 0x7F + ELSE + dst[i+7:i] := Convert_FP32_To_Int8(a[k+31:k]) + FI +ENDFOR + + SSE +
xmmintrin.h
+ Convert +
+ + + + + Store 64-bits of integer data from "a" into memory using a non-temporal memory hint. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + + Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. + +FOR j := 0 to 7 + i := j*8 + IF mask[i+7] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + SSE +
immintrin.h
+ Store +
+ + + + + + Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). + +FOR j := 0 to 7 + i := j*8 + IF mask[i+7] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + SSE +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store the upper 2 single-precision (32-bit) floating-point elements from "a" into memory. + +MEM[mem_addr+31:mem_addr] := a[95:64] +MEM[mem_addr+63:mem_addr+32] := a[127:96] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store the lower 2 single-precision (32-bit) floating-point elements from "a" into memory. + +MEM[mem_addr+31:mem_addr] := a[31:0] +MEM[mem_addr+63:mem_addr+32] := a[63:32] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store the lower single-precision (32-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+31:mem_addr] := a[31:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+31:mem_addr] := a[31:0] +MEM[mem_addr+63:mem_addr+32] := a[31:0] +MEM[mem_addr+95:mem_addr+64] := a[31:0] +MEM[mem_addr+127:mem_addr+96] := a[31:0] + + SSE +
immintrin.h
+ Store +
+ + + + + Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+31:mem_addr] := a[31:0] +MEM[mem_addr+63:mem_addr+32] := a[31:0] +MEM[mem_addr+95:mem_addr+64] := a[31:0] +MEM[mem_addr+127:mem_addr+96] := a[31:0] + + SSE +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE +
immintrin.h
+ Store +
+ + + + + Store 4 single-precision (32-bit) floating-point elements from "a" into memory in reverse order. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+31:mem_addr] := a[127:96] +MEM[mem_addr+63:mem_addr+32] := a[95:64] +MEM[mem_addr+95:mem_addr+64] := a[63:32] +MEM[mem_addr+127:mem_addr+96] := a[31:0] + + + SSE +
immintrin.h
+ Store +
+ + + + Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[j] := a[i+7] +ENDFOR +dst[MAX:8] := 0 + + + SSE +
xmmintrin.h
+ Miscellaneous +
+ + + + Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[j] := a[i+7] +ENDFOR +dst[MAX:8] := 0 + + + SSE +
xmmintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a". + +FOR j := 0 to 3 + i := j*32 + IF a[i+31] + dst[j] := 1 + ELSE + dst[j] := 0 + FI +ENDFOR +dst[MAX:4] := 0 + + + SSE +
xmmintrin.h
+ Miscellaneous +
+ + + + Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := SQRT(a[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SQRT(a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +dst[31:0] := (1.0 / a[31:0]) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (1.0 / a[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +dst[31:0] := (1.0 / SQRT(a[31:0])) +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) +ENDFOR + + + SSE +
xmmintrin.h
+ Elementary Math Functions +
+ + + + + Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) +ENDFOR + + + SSE +
xmmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] OR b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] XOR b[i+31:i] +ENDFOR + + + SSE +
xmmintrin.h
+ Logical +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] == b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] < b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] <= b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] <= b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] > b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] >= b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] >= b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := ( a[31:0] != b[31:0] ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] != b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (!( a[31:0] < b[31:0] )) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := !( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (!( a[31:0] <= b[31:0] )) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (!( a[i+31:i] <= b[i+31:i] )) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (!( a[31:0] > b[31:0] )) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (!( a[i+31:i] > b[i+31:i] )) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := (!( a[31:0] >= b[31:0] )) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := (!( a[i+31:i] >= b[i+31:i] )) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + dst[31:0] := ( a[31:0] != NaN AND b[31:0] != NaN ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] != NaN AND b[i+31:i] != NaN ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + dst[31:0] := ( a[31:0] == NaN OR b[31:0] == NaN ) ? 0xFFFFFFFF : 0 +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst". + FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == NaN OR b[i+31:i] == NaN ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] < b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] <= b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] > b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] >= b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). + RETURN ( a[31:0] == NaN OR b[31:0] == NaN OR a[31:0] != b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] < b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] <= b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] > b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] >= b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + + Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[31:0] == NaN OR b[31:0] == NaN OR a[31:0] != b[31:0] ) ? 1 : 0 + + + SSE +
xmmintrin.h
+ Compare +
+ + + + Copy single-precision (32-bit) floating-point element "a" to the lower element of "dst", and zero the upper 3 elements. + +dst[31:0] := a[31:0] +dst[127:32] := 0 + + SSE +
xmmintrin.h
+ Set +
+ + + + Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR + + SSE +
xmmintrin.h
+ Set +
+ + + + Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR + + SSE +
xmmintrin.h
+ Set +
+ + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 + + SSE +
xmmintrin.h
+ Set +
+ + + + + + + Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[31:0] := e3 +dst[63:32] := e2 +dst[95:64] := e1 +dst[127:96] := e0 + + SSE +
xmmintrin.h
+ Set +
+ + + + Return vector of type __m128 with all elements set to zero. + +dst[MAX:0] := 0 + + + SSE +
xmmintrin.h
+ Set +
+ + + + + Load 2 single-precision (32-bit) floating-point elements from memory into the upper 2 elements of "dst", and copy the lower 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. + +dst[31:0] := a[31:0] +dst[63:32] := a[63:32] +dst[95:64] := MEM[mem_addr+31:mem_addr] +dst[127:96] := MEM[mem_addr+63:mem_addr+32] + + + SSE +
immintrin.h
+ Load +
+ + + + + Load 2 single-precision (32-bit) floating-point elements from memory into the lower 2 elements of "dst", and copy the upper 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[63:32] := MEM[mem_addr+63:mem_addr+32] +dst[95:64] := a[95:64] +dst[127:96] := a[127:96] + + + SSE +
immintrin.h
+ Load +
+ + + + Load a single-precision (32-bit) floating-point element from memory into the lower of "dst", and zero the upper 3 elements. "mem_addr" does not need to be aligned on any particular boundary. + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[127:32] := 0 + + + SSE +
immintrin.h
+ Load +
+ + + + Load a single-precision (32-bit) floating-point element from memory into all elements of "dst". + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[63:32] := MEM[mem_addr+31:mem_addr] +dst[95:64] := MEM[mem_addr+31:mem_addr] +dst[127:96] := MEM[mem_addr+31:mem_addr] + + SSE +
immintrin.h
+ Load +
+ + + + Load a single-precision (32-bit) floating-point element from memory into all elements of "dst". + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[63:32] := MEM[mem_addr+31:mem_addr] +dst[95:64] := MEM[mem_addr+31:mem_addr] +dst[127:96] := MEM[mem_addr+31:mem_addr] + + SSE +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE +
immintrin.h
+ Load +
+ + + + Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE +
immintrin.h
+ Load +
+ + + + Load 4 single-precision (32-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[31:0] := MEM[mem_addr+127:mem_addr+96] +dst[63:32] := MEM[mem_addr+95:mem_addr+64] +dst[95:64] := MEM[mem_addr+63:mem_addr+32] +dst[127:96] := MEM[mem_addr+31:mem_addr] + + SSE +
immintrin.h
+ Load +
+ + + + + Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := b[31:0] +dst[127:32] := a[127:32] + + + SSE +
xmmintrin.h
+ Move +
+ + + + + Move the upper 2 single-precision (32-bit) floating-point elements from "b" to the lower 2 elements of "dst", and copy the upper 2 elements from "a" to the upper 2 elements of "dst". + +dst[31:0] := b[95:64] +dst[63:32] := b[127:96] +dst[95:64] := a[95:64] +dst[127:96] := a[127:96] + + + SSE +
xmmintrin.h
+ Move +
+ + + + + Move the lower 2 single-precision (32-bit) floating-point elements from "b" to the upper 2 elements of "dst", and copy the lower 2 elements from "a" to the lower 2 elements of "dst". + +dst[31:0] := a[31:0] +dst[63:32] := a[63:32] +dst[95:64] := b[31:0] +dst[127:96] := b[63:32] + + + SSE +
xmmintrin.h
+ Move +
+ + + + + + Return vector of type __m128d with undefined elements. + SSE2 +
emmintrin.h
+ General Support +
+ + + + Return vector of type __m128i with undefined elements. + SSE2 +
emmintrin.h
+ General Support +
+ + + + Provide a hint to the processor that the code sequence is a spin-wait loop. This can help improve the performance and power consumption of spin-wait loops. + + SSE2 +
emmintrin.h
+ General Support +
+ + + + Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy. + + SSE2 +
emmintrin.h
+ General Support +
+ + + + Perform a serializing operation on all load-from-memory instructions that were issued prior to this instruction. Guarantees that every load instruction that precedes, in program order, is globally visible before any load instruction which follows the fence in program order. + + SSE2 +
emmintrin.h
+ General Support +
+ + + + Perform a serializing operation on all load-from-memory and store-to-memory instructions that were issued prior to this instruction. Guarantees that every memory access that precedes, in program order, the memory fence instruction is globally visible before any memory instruction which follows the fence in program order. + + SSE2 +
emmintrin.h
+ General Support +
+ + + + Load unaligned 64-bit integer from memory into the first element of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[MAX:64] := 0 + + + SSE2 +
immintrin.h
+ Load +
+ + + + Load unaligned 16-bit integer from memory into the first element of "dst". + +dst[15:0] := MEM[mem_addr+15:mem_addr] +dst[MAX:16] := 0 + + SSE2 +
immintrin.h
+ Load +
+ + + + Load unaligned 32-bit integer from memory into the first element of "dst". + +dst[31:0] := MEM[mem_addr+31:mem_addr] +dst[MAX:32] := 0 + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 64-bit integer from memory into the first element of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[MAX:64] := 0 + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 128-bits of integer data from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 128-bits of integer data from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 2 double-precision (64-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[63:0] := MEM[mem_addr+127:mem_addr+64] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst". + "mem_addr" does not need to be aligned on any particular boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + Load a double-precision (64-bit) floating-point element from memory into the lower of "dst", and zero the upper element. "mem_addr" does not need to be aligned on any particular boundary. + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Load +
+ + + + + Load a double-precision (64-bit) floating-point element from memory into the upper element of "dst", and copy the lower element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. + +dst[63:0] := a[63:0] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + + Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst", and copy the upper element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Load +
+ + + + + Store 16-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+15:mem_addr] := a[15:0] + + SSE2 +
immintrin.h
+ Store +
+ + + + + Store 64-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
immintrin.h
+ Store +
+ + + + + Store 32-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+31:mem_addr] := a[31:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + + Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. "mem_addr" does not need to be aligned on any particular boundary. + +FOR j := 0 to 15 + i := j*8 + IF mask[i+7] + MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits of integer data from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits of integer data from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 64-bit integer from the first element of "a" into memory. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits of integer data from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 32-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated. + +MEM[mem_addr+31:mem_addr] := a[31:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 64-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the lower double-precision (64-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+63:mem_addr] := a[63:0] +MEM[mem_addr+127:mem_addr+64] := a[63:0] + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+63:mem_addr] := a[63:0] +MEM[mem_addr+127:mem_addr+64] := a[63:0] + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory. + "mem_addr" does not need to be aligned on any particular boundary. + +MEM[mem_addr+127:mem_addr] := a[127:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store 2 double-precision (64-bit) floating-point elements from "a" into memory in reverse order. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +MEM[mem_addr+63:mem_addr] := a[127:64] +MEM[mem_addr+127:mem_addr+64] := a[63:0] + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the upper double-precision (64-bit) floating-point element from "a" into memory. + +MEM[mem_addr+63:mem_addr] := a[127:64] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Store the lower double-precision (64-bit) floating-point element from "a" into memory. + +MEM[mem_addr+63:mem_addr] := a[63:0] + + + SSE2 +
emmintrin.h
+ Store +
+ + + + + Add packed 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := a[i+7:i] + b[i+7:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := a[i+15:i] + b[i+15:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed 32-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] + b[i+31:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add 64-bit integers "a" and "b", and store the result in "dst". + +dst[63:0] := a[63:0] + b[63:0] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed 64-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[31:16] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := a[i+15:i] * b[i+15:i] + dst[i+15:i] := tmp[31:16] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) + dst[i+15:i] := tmp[15:0] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the low unsigned 32-bit integers from "a" and "b", and store the unsigned 64-bit result in "dst". + +dst[63:0] := a[31:0] * b[31:0] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+31:i] * b[i+31:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". + +FOR j := 0 to 15 + i := j*8 + tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +ENDFOR +FOR j := 0 to 1 + i := j*64 + dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ + tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] + dst[i+63:i+16] := 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := a[i+7:i] - b[i+7:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := a[i+15:i] - b[i+15:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[i+31:i] - b[i+31:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract 64-bit integer "b" from 64-bit integer "a", and store the result in "dst". + +dst[63:0] := a[63:0] - b[63:0] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := a[63:0] + b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] + b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := a[63:0] / b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + dst[i+63:i] := a[i+63:i] / b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := a[63:0] * b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] * b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := a[63:0] - b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] - b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Arithmetic +
+ + + + + Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +ENDFOR + + + SSE2 +
emmintrin.h
+ Probability/Statistics +
+ + + + + Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +ENDFOR + + + SSE2 +
emmintrin.h
+ Probability/Statistics +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [max_float_note] + +dst[63:0] := MAX(a[63:0], b[63:0]) +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [min_float_note] + +dst[63:0] := MIN(a[63:0], b[63:0]) +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Special Math Functions +
+ + + + + Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] << (tmp*8) + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) + ELSE + dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) + ELSE + dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". + +tmp := imm8[7:0] +IF tmp > 15 + tmp := 16 +FI +dst[127:0] := a[127:0] >> (tmp*8) + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF imm8[7:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF count[63:0] > 15 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[7:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF count[63:0] > 31 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF imm8[7:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF count[63:0] > 63 + dst[i+63:i] := 0 + ELSE + dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) + FI +ENDFOR + + + SSE2 +
emmintrin.h
+ Shift +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[127:0] := (a[127:0] AND b[127:0]) + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of 128 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". + +dst[127:0] := ((NOT a[127:0]) AND b[127:0]) + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[127:0] := (a[127:0] OR b[127:0]) + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". + +dst[127:0] := (a[127:0] XOR b[127:0]) + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] OR b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +ENDFOR + + + SSE2 +
emmintrin.h
+ Logical +
+ + + + + Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtb instruction with the order of the operands switched. + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := ( a[i+7:i] < b[i+7:i] ) ? 0xFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtw instruction with the order of the operands switched. + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ( a[i+15:i] < b[i+15:i] ) ? 0xFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtd instruction with the order of the operands switched. + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] == b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] < b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] <= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] > b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] >= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + dst[63:0] := (a[63:0] != NaN AND b[63:0] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + dst[63:0] := (a[63:0] == NaN OR b[63:0] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (a[63:0] != b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (!(a[63:0] < b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (!(a[63:0] <= b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (!(a[63:0] > b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := (!(a[63:0] >= b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] == b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] < b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] <= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] > b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] >= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst". + FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (a[i+63:i] != b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (!(a[i+63:i] < b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (!(a[i+63:i] <= b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (!(a[i+63:i] > b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := (!(a[i+63:i] >= b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] == b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] < b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] <= b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] > b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] >= b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). + RETURN ( a[63:0] == NaN OR b[63:0] == NaN OR a[63:0] != b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] == b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] < b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] <= b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] > b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] >= b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + + Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. + RETURN ( a[63:0] == NaN OR b[63:0] == NaN OR a[63:0] != b[63:0] ) ? 1 : 0 + + + SSE2 +
emmintrin.h
+ Compare +
+ + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + m := j*64 + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the signed 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_Int64_To_FP64(b[63:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := j*32 + m := j*64 + dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst". + +dst[31:0] := a[31:0] +dst[127:32] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy the lower 32-bit integer in "a" to "dst". + +dst[31:0] := a[31:0] + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy the lower 64-bit integer in "a" to "dst". + +dst[63:0] := a[63:0] + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy the lower 64-bit integer in "a" to "dst". + +dst[63:0] := a[63:0] + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) +ENDFOR +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_Int32(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := Convert_FP64_To_FP32(b[63:0]) +dst[127:32] := a[127:32] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". + +dst[63:0] := a[63:0] + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +dst[127:64] := a[127:64] +dst[MAX:128] := 0 + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". + +dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". + +dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". + +FOR j := 0 to 1 + i := 32*j + k := 64*j + dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Convert +
+ + + + + Set packed 64-bit integers in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + Set packed 64-bit integers in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + Set packed 32-bit integers in "dst" with the supplied values. + +dst[31:0] := e0 +dst[63:32] := e1 +dst[95:64] := e2 +dst[127:96] := e3 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values. + +dst[15:0] := e0 +dst[31:16] := e1 +dst[47:32] := e2 +dst[63:48] := e3 +dst[79:64] := e4 +dst[95:80] := e5 +dst[111:96] := e6 +dst[127:112] := e7 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values. + +dst[7:0] := e0 +dst[15:8] := e1 +dst[23:16] := e2 +dst[31:24] := e3 +dst[39:32] := e4 +dst[47:40] := e5 +dst[55:48] := e6 +dst[63:56] := e7 +dst[71:64] := e8 +dst[79:72] := e9 +dst[87:80] := e10 +dst[95:88] := e11 +dst[103:96] := e12 +dst[111:104] := e13 +dst[119:112] := e14 +dst[127:120] := e15 + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 64-bit integer "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastd". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := a[31:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate "vpbroadcastw". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := a[15:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastb". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := a[7:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + + Set packed 64-bit integers in "dst" with the supplied values in reverse order. + +dst[63:0] := e1 +dst[127:64] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + Set packed 32-bit integers in "dst" with the supplied values in reverse order. + +dst[31:0] := e3 +dst[63:32] := e2 +dst[95:64] := e1 +dst[127:96] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + + + + + Set packed 16-bit integers in "dst" with the supplied values in reverse order. + +dst[15:0] := e7 +dst[31:16] := e6 +dst[47:32] := e5 +dst[63:48] := e4 +dst[79:64] := e3 +dst[95:80] := e2 +dst[111:96] := e1 +dst[127:112] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + + + + + + + + + + + + + + + Set packed 8-bit integers in "dst" with the supplied values in reverse order. + +dst[7:0] := e15 +dst[15:8] := e14 +dst[23:16] := e13 +dst[31:24] := e12 +dst[39:32] := e11 +dst[47:40] := e10 +dst[55:48] := e9 +dst[63:56] := e8 +dst[71:64] := e7 +dst[79:72] := e6 +dst[87:80] := e5 +dst[95:88] := e4 +dst[103:96] := e3 +dst[111:104] := e2 +dst[119:112] := e1 +dst[127:120] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + Return vector of type __m128i with all elements set to zero. + +dst[MAX:0] := 0 + + + SSE2 +
emmintrin.h
+ Set +
+ + + + Copy double-precision (64-bit) floating-point element "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := a[63:0] +ENDFOR + + SSE2 +
emmintrin.h
+ Set +
+ + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. + +dst[63:0] := e0 +dst[127:64] := e1 + + SSE2 +
emmintrin.h
+ Set +
+ + + + + Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. + +dst[63:0] := e1 +dst[127:64] := e0 + + SSE2 +
emmintrin.h
+ Set +
+ + + + Return vector of type __m128d with all elements set to zero. + +dst[MAX:0] := 0 + + + SSE2 +
emmintrin.h
+ Set +
+ + + + Copy the lower 64-bit integer in "a" to "dst". + +dst[63:0] := a[63:0] + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". + +dst[7:0] := Saturate8(a[15:0]) +dst[15:8] := Saturate8(a[31:16]) +dst[23:16] := Saturate8(a[47:32]) +dst[31:24] := Saturate8(a[63:48]) +dst[39:32] := Saturate8(a[79:64]) +dst[47:40] := Saturate8(a[95:80]) +dst[55:48] := Saturate8(a[111:96]) +dst[63:56] := Saturate8(a[127:112]) +dst[71:64] := Saturate8(b[15:0]) +dst[79:72] := Saturate8(b[31:16]) +dst[87:80] := Saturate8(b[47:32]) +dst[95:88] := Saturate8(b[63:48]) +dst[103:96] := Saturate8(b[79:64]) +dst[111:104] := Saturate8(b[95:80]) +dst[119:112] := Saturate8(b[111:96]) +dst[127:120] := Saturate8(b[127:112]) + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". + +dst[15:0] := Saturate16(a[31:0]) +dst[31:16] := Saturate16(a[63:32]) +dst[47:32] := Saturate16(a[95:64]) +dst[63:48] := Saturate16(a[127:96]) +dst[79:64] := Saturate16(b[31:0]) +dst[95:80] := Saturate16(b[63:32]) +dst[111:96] := Saturate16(b[95:64]) +dst[127:112] := Saturate16(b[127:96]) + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + + Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". + +dst[7:0] := SaturateU8(a[15:0]) +dst[15:8] := SaturateU8(a[31:16]) +dst[23:16] := SaturateU8(a[47:32]) +dst[31:24] := SaturateU8(a[63:48]) +dst[39:32] := SaturateU8(a[79:64]) +dst[47:40] := SaturateU8(a[95:80]) +dst[55:48] := SaturateU8(a[111:96]) +dst[63:56] := SaturateU8(a[127:112]) +dst[71:64] := SaturateU8(b[15:0]) +dst[79:72] := SaturateU8(b[31:16]) +dst[87:80] := SaturateU8(b[47:32]) +dst[95:88] := SaturateU8(b[63:48]) +dst[103:96] := SaturateU8(b[79:64]) +dst[111:104] := SaturateU8(b[95:80]) +dst[119:112] := SaturateU8(b[111:96]) +dst[127:120] := SaturateU8(b[127:112]) + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[j] := a[i+7] +ENDFOR +dst[MAX:16] := 0 + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a". + +FOR j := 0 to 1 + i := j*64 + IF a[i+63] + dst[j] := 1 + ELSE + dst[j] := 0 + FI +ENDFOR +dst[MAX:2] := 0 + + + SSE2 +
emmintrin.h
+ Miscellaneous +
+ + + + Copy the 64-bit integer "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Move +
+ + + + Copy the lower 64-bit integer in "a" to the lower element of "dst", and zero the upper element. + +dst[63:0] := a[63:0] +dst[127:64] := 0 + + + SSE2 +
emmintrin.h
+ Move +
+ + + + + Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := b[63:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Move +
+ + + + + Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". + +dst[15:0] := (a[127:0] >> (imm8[2:0] * 16))[15:0] +dst[31:16] := 0 + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". + +dst[127:0] := a[127:0] +sel := imm8[2:0]*16 +dst[sel+15:sel] := i[15:0] + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst". + +DEFINE SELECT4(src, control) { + CASE(control[1:0]) OF + 0: tmp[31:0] := src[31:0] + 1: tmp[31:0] := src[63:32] + 2: tmp[31:0] := src[95:64] + 3: tmp[31:0] := src[127:96] + ESAC + RETURN tmp[31:0] +} +dst[31:0] := SELECT4(a[127:0], imm8[1:0]) +dst[63:32] := SELECT4(a[127:0], imm8[3:2]) +dst[95:64] := SELECT4(a[127:0], imm8[5:4]) +dst[127:96] := SELECT4(a[127:0], imm8[7:6]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst". + +dst[63:0] := a[63:0] +dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] +dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] +dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] +dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst". + +dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] +dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] +dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] +dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[71:64] + dst[15:8] := src2[71:64] + dst[23:16] := src1[79:72] + dst[31:24] := src2[79:72] + dst[39:32] := src1[87:80] + dst[47:40] := src2[87:80] + dst[55:48] := src1[95:88] + dst[63:56] := src2[95:88] + dst[71:64] := src1[103:96] + dst[79:72] := src2[103:96] + dst[87:80] := src1[111:104] + dst[95:88] := src2[111:104] + dst[103:96] := src1[119:112] + dst[111:104] := src2[119:112] + dst[119:112] := src1[127:120] + dst[127:120] := src2[127:120] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[79:64] + dst[31:16] := src2[79:64] + dst[47:32] := src1[95:80] + dst[63:48] := src2[95:80] + dst[79:64] := src1[111:96] + dst[95:80] := src2[111:96] + dst[111:96] := src1[127:112] + dst[127:112] := src2[127:112] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[95:64] + dst[63:32] := src2[95:64] + dst[95:64] := src1[127:96] + dst[127:96] := src2[127:96] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { + dst[7:0] := src1[7:0] + dst[15:8] := src2[7:0] + dst[23:16] := src1[15:8] + dst[31:24] := src2[15:8] + dst[39:32] := src1[23:16] + dst[47:40] := src2[23:16] + dst[55:48] := src1[31:24] + dst[63:56] := src2[31:24] + dst[71:64] := src1[39:32] + dst[79:72] := src2[39:32] + dst[87:80] := src1[47:40] + dst[95:88] := src2[47:40] + dst[103:96] := src1[55:48] + dst[111:104] := src2[55:48] + dst[119:112] := src1[63:56] + dst[127:120] := src2[63:56] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { + dst[15:0] := src1[15:0] + dst[31:16] := src2[15:0] + dst[47:32] := src1[31:16] + dst[63:48] := src2[31:16] + dst[79:64] := src1[47:32] + dst[95:80] := src2[47:32] + dst[111:96] := src1[63:48] + dst[127:112] := src2[63:48] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { + dst[31:0] := src1[31:0] + dst[63:32] := src2[31:0] + dst[95:64] := src1[63:32] + dst[127:96] := src2[63:32] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[127:64] + dst[127:64] := src2[127:64] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". + +DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { + dst[63:0] := src1[63:0] + dst[127:64] := src2[63:0] + RETURN dst[127:0] +} +dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + + Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst". + +dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] + + + SSE2 +
emmintrin.h
+ Swizzle +
+ + + + + Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := SQRT(b[63:0]) +dst[127:64] := a[127:64] + + + SSE2 +
emmintrin.h
+ Elementary Math Functions +
+ + + + Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SQRT(a[i+63:i]) +ENDFOR + + + SSE2 +
emmintrin.h
+ Elementary Math Functions +
+ + + + Cast vector of type __m128d to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128d to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128 to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + Cast vector of type __m128i to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. + SSE2 +
emmintrin.h
+ Cast +
+ + + + + + + Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF ((j & 1) == 0) + dst[i+31:i] := a[i+31:i] - b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + b[i+31:i] + FI +ENDFOR + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF ((j & 1) == 0) + dst[i+63:i] := a[i+63:i] - b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + b[i+63:i] + FI +ENDFOR + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[63:0] := a[127:64] + a[63:0] +dst[127:64] := b[127:64] + b[63:0] + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := a[127:96] + a[95:64] +dst[95:64] := b[63:32] + b[31:0] +dst[127:96] := b[127:96] + b[95:64] + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[63:0] := a[63:0] - a[127:64] +dst[127:64] := b[63:0] - b[127:64] + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := a[95:64] - a[127:96] +dst[95:64] := b[31:0] - b[63:32] +dst[127:96] := b[95:64] - b[127:96] + + + SSE3 +
pmmintrin.h
+ Arithmetic +
+ + + + Load 128-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm_loadu_si128" when the data crosses a cache line boundary. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE3 +
pmmintrin.h
+ Load +
+ + + + Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". + +dst[63:0] := MEM[mem_addr+63:mem_addr] +dst[127:64] := MEM[mem_addr+63:mem_addr] + + + SSE3 +
pmmintrin.h
+ Load +
+ + + + Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst". + +dst[63:0] := a[63:0] +dst[127:64] := a[63:0] + + + SSE3 +
pmmintrin.h
+ Move +
+ + + + Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[63:32] +dst[63:32] := a[63:32] +dst[95:64] := a[127:96] +dst[127:96] := a[127:96] + + + SSE3 +
pmmintrin.h
+ Move +
+ + + + Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". + +dst[31:0] := a[31:0] +dst[63:32] := a[31:0] +dst[95:64] := a[95:64] +dst[127:96] := a[95:64] + + + SSE3 +
pmmintrin.h
+ Move +
+ + + + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF imm8[j] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF imm8[j] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + IF mask[i+63] + dst[i+63:i] := b[i+63:i] + ELSE + dst[i+63:i] := a[i+63:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 3 + i := j*32 + IF mask[i+31] + dst[i+31:i] := b[i+31:i] + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + IF mask[i+7] + dst[i+7:i] := b[i+7:i] + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". + +FOR j := 0 to 7 + i := j*16 + IF imm8[j] + dst[i+15:i] := b[i+15:i] + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst". + +dst[31:0] := (a[127:0] >> (imm8[1:0] * 32))[31:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + Extract an 8-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". + +dst[7:0] := (a[127:0] >> (imm8[3:0] * 8))[7:0] +dst[31:8] := 0 + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + Extract a 32-bit integer from "a", selected with "imm8", and store the result in "dst". + +dst[31:0] := (a[127:0] >> (imm8[1:0] * 32))[31:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + Extract a 64-bit integer from "a", selected with "imm8", and store the result in "dst". + +dst[63:0] := (a[127:0] >> (imm8[0] * 64))[63:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "tmp", then insert a single-precision (32-bit) floating-point element from "b" into "tmp" using the control in "imm8". Store "tmp" to "dst" using the mask in "imm8" (elements are zeroed out when the corresponding bit is set). + +tmp2[127:0] := a[127:0] +CASE (imm8[7:6]) OF +0: tmp1[31:0] := b[31:0] +1: tmp1[31:0] := b[63:32] +2: tmp1[31:0] := b[95:64] +3: tmp1[31:0] := b[127:96] +ESAC +CASE (imm8[5:4]) OF +0: tmp2[31:0] := tmp1[31:0] +1: tmp2[63:32] := tmp1[31:0] +2: tmp2[95:64] := tmp1[31:0] +3: tmp2[127:96] := tmp1[31:0] +ESAC +FOR j := 0 to 3 + i := j*32 + IF imm8[j%8] + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := tmp2[i+31:i] + FI +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the lower 8-bit integer from "i" into "dst" at the location specified by "imm8". + +dst[127:0] := a[127:0] +sel := imm8[3:0]*8 +dst[sel+7:sel] := i[7:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "imm8". + +dst[127:0] := a[127:0] +sel := imm8[1:0]*32 +dst[sel+31:sel] := i[31:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "imm8". + +dst[127:0] := a[127:0] +sel := imm8[0]*64 +dst[sel+63:sel] := i[63:0] + + + SSE4.1 +
smmintrin.h
+ Swizzle +
+ + + + + + Conditionally multiply the packed double-precision (64-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". + +DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { + FOR j := 0 to 1 + i := j*64 + IF imm8[(4+j)%8] + temp[i+63:i] := a[i+63:i] * b[i+63:i] + ELSE + temp[i+63:i] := 0.0 + FI + ENDFOR + + sum[63:0] := temp[127:64] + temp[63:0] + + FOR j := 0 to 1 + i := j*64 + IF imm8[j%8] + tmpdst[i+63:i] := sum[63:0] + ELSE + tmpdst[i+63:i] := 0.0 + FI + ENDFOR + RETURN tmpdst[127:0] +} +dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + + + + + Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". + +DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { + FOR j := 0 to 3 + i := j*32 + IF imm8[(4+j)%8] + temp[i+31:i] := a[i+31:i] * b[i+31:i] + ELSE + temp[i+31:i] := 0 + FI + ENDFOR + + sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0]) + + FOR j := 0 to 3 + i := j*32 + IF imm8[j%8] + tmpdst[i+31:i] := sum[31:0] + ELSE + tmpdst[i+31:i] := 0 + FI + ENDFOR + RETURN tmpdst[127:0] +} +dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + + + + Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + + + + Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". + +FOR j := 0 to 3 + i := j*32 + tmp[63:0] := a[i+31:i] * b[i+31:i] + dst[i+31:i] := tmp[31:0] +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + Miscellaneous + + + + + Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". + Eight SADs are performed using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8". + +DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) { + a_offset := imm8[2]*32 + b_offset := imm8[1:0]*32 + FOR j := 0 to 7 + i := j*8 + k := a_offset+i + l := b_offset + tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \ + ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24])) + ENDFOR + RETURN tmp[127:0] +} +dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0]) + + + SSE4.1 +
smmintrin.h
+ Arithmetic +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst". + [round_note] + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ROUND(a[i+63:i], rounding) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := FLOOR(a[i+63:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := CEIL(a[i+63:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". + [round_note] + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ROUND(a[i+31:i], rounding) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := FLOOR(a[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := CEIL(a[i+31:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + + Round the lower double-precision (64-bit) floating-point element in "b" using the "rounding" parameter, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + [round_note] + +dst[63:0] := ROUND(b[63:0], rounding) +dst[127:64] := a[127:64] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the lower double-precision (64-bit) floating-point element in "b" down to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := FLOOR(b[63:0]) +dst[127:64] := a[127:64] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the lower double-precision (64-bit) floating-point element in "b" up to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". + +dst[63:0] := CEIL(b[63:0]) +dst[127:64] := a[127:64] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + + Round the lower single-precision (32-bit) floating-point element in "b" using the "rounding" parameter, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + [round_note] + +dst[31:0] := ROUND(b[31:0], rounding) +dst[127:32] := a[127:32] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the lower single-precision (32-bit) floating-point element in "b" down to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := FLOOR(b[31:0]) +dst[127:32] := a[127:32] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + + + + Round the lower single-precision (32-bit) floating-point element in "b" up to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". + +dst[31:0] := CEIL(b[31:0]) +dst[127:32] := a[127:32] + + + SSE4.1 +
smmintrin.h
+ Special Math Functions +
+ + Miscellaneous + + + + Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". + +dst[15:0] := SaturateU16(a[31:0]) +dst[31:16] := SaturateU16(a[63:32]) +dst[47:32] := SaturateU16(a[95:64]) +dst[63:48] := SaturateU16(a[127:96]) +dst[79:64] := SaturateU16(b[31:0]) +dst[95:80] := SaturateU16(b[63:32]) +dst[111:96] := SaturateU16(b[95:64]) +dst[127:112] := SaturateU16(b[127:96]) + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + l := j*16 + dst[l+15:l] := SignExtend16(a[i+7:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[i+31:i] := SignExtend32(a[k+7:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[i+63:i] := SignExtend64(a[k+7:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[i+31:i] := SignExtend32(a[k+15:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[i+63:i] := SignExtend64(a[k+15:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[i+63:i] := SignExtend64(a[k+31:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + l := j*16 + dst[l+15:l] := ZeroExtend16(a[i+7:i]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 8*j + dst[i+31:i] := ZeroExtend32(a[k+7:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 8*j + dst[i+63:i] := ZeroExtend64(a[k+7:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". + +FOR j := 0 to 3 + i := 32*j + k := 16*j + dst[i+31:i] := ZeroExtend32(a[k+15:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 16*j + dst[i+63:i] := ZeroExtend64(a[k+15:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". + +FOR j := 0 to 1 + i := 64*j + k := 32*j + dst[i+63:i] := ZeroExtend64(a[k+31:k]) +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Convert +
+ + + + + Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE4.1 +
smmintrin.h
+ Compare +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value. + +IF ((a[127:0] AND b[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[127:0]) AND b[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +RETURN ZF + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value. + +IF ((a[127:0] AND b[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[127:0]) AND b[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +RETURN CF + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +IF ((a[127:0] AND b[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[127:0]) AND b[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and return 1 if the result is zero, otherwise return 0. + +IF ((a[127:0] AND mask[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +dst := ZF + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + + Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "mask", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. + +IF ((a[127:0] AND mask[127:0]) == 0) + ZF := 1 +ELSE + ZF := 0 +FI +IF (((NOT a[127:0]) AND mask[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +IF (ZF == 0 && CF == 0) + dst := 1 +ELSE + dst := 0 +FI + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + Compute the bitwise NOT of "a" and then AND with a 128-bit vector containing all 1's, and return 1 if the result is zero, otherwise return 0. + +FOR j := 0 to 127 + tmp[j] := 1 +ENDFOR +IF (((NOT a[127:0]) AND tmp[127:0]) == 0) + CF := 1 +ELSE + CF := 0 +FI +dst := CF + + + + SSE4.1 +
smmintrin.h
+ Logical +
+ + + + Horizontally compute the minimum amongst the packed unsigned 16-bit integers in "a", store the minimum and index in "dst", and zero the remaining bits in "dst". + +index[2:0] := 0 +min[15:0] := a[15:0] +FOR j := 0 to 7 + i := j*16 + IF a[i+15:i] < min[15:0] + index[2:0] := j + min[15:0] := a[i+15:i] + FI +ENDFOR +dst[15:0] := min[15:0] +dst[18:16] := index[2:0] +dst[127:19] := 0 + + + SSE4.1 +
smmintrin.h
+ Miscellaneous +
+ + + + Load 128-bits of integer data from memory into "dst" using a non-temporal memory hint. + "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. + +dst[127:0] := MEM[mem_addr+127:mem_addr] + + + SSE4.1 +
smmintrin.h
+ Load +
+ + + + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated mask in "dst". + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +IF imm8[6] // byte / word mask + FOR i := 0 to UpperBound + j := i*size + IF IntRes2[i] + dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF) + ELSE + dst[j+size-1:j] := 0 + FI + ENDFOR +ELSE // bit mask + dst[UpperBound:0] := IntRes2[UpperBound:0] + dst[127:UpperBound+1] := 0 +FI + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated index in "dst". + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +IF imm8[6] // most significant bit + tmp := UpperBound + dst := tmp + DO WHILE ((tmp >= 0) AND a[tmp] == 0) + tmp := tmp - 1 + dst := tmp + OD +ELSE // least significant bit + tmp := 0 + dst := tmp + DO WHILE ((tmp <= UpperBound) AND a[tmp] == 0) + tmp := tmp + 1 + dst := tmp + OD +FI + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +bInvalid := 0 +FOR j := 0 to UpperBound + n := j*size + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI +ENDFOR +dst := bInvalid + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := (IntRes2 != 0) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +aInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI +ENDFOR +dst := aInvalid + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns bit 0 of the resulting bit mask. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := IntRes2[0] + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF a[m+size-1:m] == 0 + aInvalid := 1 + FI + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +bInvalid := 0 +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF b[n+size-1:n] == 0 + bInvalid := 1 + FI + IF bInvalid // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := (IntRes2 == 0) AND bInvalid + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated mask in "dst". + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +IF imm8[6] // byte / word mask + FOR i := 0 to UpperBound + j := i*size + IF IntRes2[i] + dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF) + ELSE + dst[j+size-1:j] := 0 + FI + ENDFOR +ELSE // bit mask + dst[UpperBound:0] := IntRes2[UpperBound:0] + dst[127:UpperBound+1] := 0 +FI + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated index in "dst". + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +IF imm8[6] // most significant bit + tmp := UpperBound + dst := tmp + DO WHILE ((tmp >= 0) AND a[tmp] == 0) + tmp := tmp - 1 + dst := tmp + OD +ELSE // least significant bit + tmp := 0 + dst := tmp + DO WHILE ((tmp <= UpperBound) AND a[tmp] == 0) + tmp := tmp + 1 + dst := tmp + OD +FI + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +dst := (lb <= UpperBound) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := (IntRes2 != 0) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +dst := (la <= UpperBound) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns bit 0 of the resulting bit mask. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := IntRes2[0] + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + + + + Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise. + [strcmp_note] + +size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters +UpperBound := (128 / size) - 1 +BoolRes := 0 +// compare all characters +aInvalid := 0 +bInvalid := 0 +FOR i := 0 to UpperBound + m := i*size + FOR j := 0 to UpperBound + n := j*size + BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 + + // invalidate characters after EOS + IF i == la + aInvalid := 1 + FI + IF j == lb + bInvalid := 1 + FI + + // override comparisons for invalid characters + CASE (imm8[3:2]) OF + 0: // equal any + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 1: // ranges + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + FI + 2: // equal each + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + 3: // equal ordered + IF (!aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 0 + ELSE IF (aInvalid && !bInvalid) + BoolRes.word[i].bit[j] := 1 + ELSE IF (aInvalid && bInvalid) + BoolRes.word[i].bit[j] := 1 + FI + ESAC + ENDFOR +ENDFOR +// aggregate results +CASE (imm8[3:2]) OF +0: // equal any + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] + ENDFOR + ENDFOR +1: // ranges + IntRes1 := 0 + FOR i := 0 to UpperBound + FOR j := 0 to UpperBound + IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) + j += 2 + ENDFOR + ENDFOR +2: // equal each + IntRes1 := 0 + FOR i := 0 to UpperBound + IntRes1[i] := BoolRes.word[i].bit[i] + ENDFOR +3: // equal ordered + IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) + FOR i := 0 to UpperBound + k := i + FOR j := 0 to UpperBound-i + IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] + k := k+1 + ENDFOR + ENDFOR +ESAC +// optionally negate results +FOR i := 0 to UpperBound + IF imm8[4] + IF imm8[5] // only negate valid + IF i >= lb // invalid, don't negate + IntRes2[i] := IntRes1[i] + ELSE // valid, negate + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // negate all + IntRes2[i] := -1 XOR IntRes1[i] + FI + ELSE // don't negate + IntRes2[i] := IntRes1[i] + FI +ENDFOR +// output +dst := (IntRes2 == 0) AND (lb > UpperBound) + + + SSE4.2 +
nmmintrin.h
+ String Compare +
+ + + + + Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst". + +FOR j := 0 to 1 + i := j*64 + dst[i+63:i] := ( a[i+63:i] > b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 +ENDFOR + + + SSE4.2 +
nmmintrin.h
+ Compare +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst". + tmp1[7:0] := v[0:7] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[39:0] := tmp1[7:0] << 32 +tmp4[39:0] := tmp2[31:0] << 8 +tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0] +tmp6[31:0] := MOD2(tmp5[39:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + SSE4.2 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst". + tmp1[15:0] := v[0:15] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[47:0] := tmp1[15:0] << 32 +tmp4[47:0] := tmp2[31:0] << 16 +tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0] +tmp6[31:0] := MOD2(tmp5[47:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + SSE4.2 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst". + tmp1[31:0] := v[0:31] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[63:0] := tmp1[31:0] << 32 +tmp4[63:0] := tmp2[31:0] << 32 +tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0] +tmp6[31:0] := MOD2(tmp5[63:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + SSE4.2 +
nmmintrin.h
+ Cryptography +
+ + + + + Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst". + tmp1[63:0] := v[0:63] // bit reflection +tmp2[31:0] := crc[0:31] // bit reflection +tmp3[95:0] := tmp1[31:0] << 32 +tmp4[95:0] := tmp2[63:0] << 64 +tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0] +tmp6[31:0] := MOD2(tmp5[95:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 +dst[31:0] := tmp6[0:31] // bit reflection + + + SSE4.2 +
nmmintrin.h
+ Cryptography +
+ + + + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 7 + i := j*8 + dst[i+7:i] := ABS(Int(a[i+7:i])) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 15 + i := j*8 + dst[i+7:i] := ABS(a[i+7:i]) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := ABS(Int(a[i+15:i])) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := ABS(a[i+15:i]) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 1 + i := j*32 + dst[i+31:i] := ABS(a[i+31:i]) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". + +FOR j := 0 to 3 + i := j*32 + dst[i+31:i] := ABS(a[i+31:i]) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Special Math Functions +
+ + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". + +FOR j := 0 to 15 + i := j*8 + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[3:0] := b[i+3:i] + dst[i+7:i] := a[index*8+7:index*8] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Swizzle +
+ + + + + Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". + +FOR j := 0 to 7 + i := j*8 + IF b[i+7] == 1 + dst[i+7:i] := 0 + ELSE + index[2:0] := b[i+2:i] + dst[i+7:i] := a[index*8+7:index*8] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Swizzle +
+ + + + + + Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". + +tmp[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) +dst[127:0] := tmp[127:0] + + + SSSE3 +
tmmintrin.h
+ Miscellaneous +
+ + + + + + Concatenate 8-byte blocks in "a" and "b" into a 16-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". + +tmp[127:0] := ((a[63:0] << 64)[127:0] OR b[63:0]) >> (imm8*8) +dst[63:0] := tmp[63:0] + + + SSSE3 +
tmmintrin.h
+ Miscellaneous +
+ + + + + Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[31:16] + a[15:0] +dst[31:16] := a[63:48] + a[47:32] +dst[47:32] := a[95:80] + a[79:64] +dst[63:48] := a[127:112] + a[111:96] +dst[79:64] := b[31:16] + b[15:0] +dst[95:80] := b[63:48] + b[47:32] +dst[111:96] := b[95:80] + b[79:64] +dst[127:112] := b[127:112] + b[111:96] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[31:16] + a[15:0]) +dst[31:16] := Saturate16(a[63:48] + a[47:32]) +dst[47:32] := Saturate16(a[95:80] + a[79:64]) +dst[63:48] := Saturate16(a[127:112] + a[111:96]) +dst[79:64] := Saturate16(b[31:16] + b[15:0]) +dst[95:80] := Saturate16(b[63:48] + b[47:32]) +dst[111:96] := Saturate16(b[95:80] + b[79:64]) +dst[127:112] := Saturate16(b[127:112] + b[111:96]) + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := a[127:96] + a[95:64] +dst[95:64] := b[63:32] + b[31:0] +dst[127:96] := b[127:96] + b[95:64] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[31:16] + a[15:0] +dst[31:16] := a[63:48] + a[47:32] +dst[47:32] := b[31:16] + b[15:0] +dst[63:48] := b[63:48] + b[47:32] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[63:32] + a[31:0] +dst[63:32] := b[63:32] + b[31:0] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[31:16] + a[15:0]) +dst[31:16] := Saturate16(a[63:48] + a[47:32]) +dst[47:32] := Saturate16(b[31:16] + b[15:0]) +dst[63:48] := Saturate16(b[63:48] + b[47:32]) + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[15:0] - a[31:16] +dst[31:16] := a[47:32] - a[63:48] +dst[47:32] := a[79:64] - a[95:80] +dst[63:48] := a[111:96] - a[127:112] +dst[79:64] := b[15:0] - b[31:16] +dst[95:80] := b[47:32] - b[63:48] +dst[111:96] := b[79:64] - b[95:80] +dst[127:112] := b[111:96] - b[127:112] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[15:0] - a[31:16]) +dst[31:16] := Saturate16(a[47:32] - a[63:48]) +dst[47:32] := Saturate16(a[79:64] - a[95:80]) +dst[63:48] := Saturate16(a[111:96] - a[127:112]) +dst[79:64] := Saturate16(b[15:0] - b[31:16]) +dst[95:80] := Saturate16(b[47:32] - b[63:48]) +dst[111:96] := Saturate16(b[79:64] - b[95:80]) +dst[127:112] := Saturate16(b[111:96] - b[127:112]) + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := a[95:64] - a[127:96] +dst[95:64] := b[31:0] - b[63:32] +dst[127:96] := b[95:64] - b[127:96] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". + +dst[15:0] := a[15:0] - a[31:16] +dst[31:16] := a[47:32] - a[63:48] +dst[47:32] := b[15:0] - b[31:16] +dst[63:48] := b[47:32] - b[63:48] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". + +dst[31:0] := a[31:0] - a[63:32] +dst[63:32] := b[31:0] - b[63:32] + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". + +dst[15:0] := Saturate16(a[15:0] - a[31:16]) +dst[31:16] := Saturate16(a[47:32] - a[63:48]) +dst[47:32] := Saturate16(b[15:0] - b[31:16]) +dst[63:48] := Saturate16(b[47:32] - b[63:48]) + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". + +FOR j := 0 to 7 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". + +FOR j := 0 to 3 + i := j*16 + dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". + +FOR j := 0 to 7 + i := j*16 + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". + +FOR j := 0 to 3 + i := j*16 + tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 + dst[i+15:i] := tmp[16:1] +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 15 + i := j*8 + IF b[i+7:i] < 0 + dst[i+7:i] := -(a[i+7:i]) + ELSE IF b[i+7:i] == 0 + dst[i+7:i] := 0 + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 7 + i := j*16 + IF b[i+15:i] < 0 + dst[i+15:i] := -(a[i+15:i]) + ELSE IF b[i+15:i] == 0 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 3 + i := j*32 + IF b[i+31:i] < 0 + dst[i+31:i] := -(a[i+31:i]) + ELSE IF b[i+31:i] == 0 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 7 + i := j*8 + IF b[i+7:i] < 0 + dst[i+7:i] := -(a[i+7:i]) + ELSE IF b[i+7:i] == 0 + dst[i+7:i] := 0 + ELSE + dst[i+7:i] := a[i+7:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 3 + i := j*16 + IF b[i+15:i] < 0 + dst[i+15:i] := -(a[i+15:i]) + ELSE IF b[i+15:i] == 0 + dst[i+15:i] := 0 + ELSE + dst[i+15:i] := a[i+15:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. + +FOR j := 0 to 1 + i := j*32 + IF b[i+31:i] < 0 + dst[i+31:i] := -(a[i+31:i]) + ELSE IF b[i+31:i] == 0 + dst[i+31:i] := 0 + ELSE + dst[i+31:i] := a[i+31:i] + FI +ENDFOR + + + SSSE3 +
tmmintrin.h
+ Arithmetic +
+ + + + + + Copy the current 64-bit value of the processor's time-stamp counter into "dst". + dst[63:0] := TimeStampCounter + + + TSC +
immintrin.h
+ General Support +
+ + + + + Mark the start of a TSX (HLE/RTM) suspend load address tracking region. If this is used inside a transactional region, subsequent loads are not added to the read set of the transaction. If this is used inside a suspend load address tracking region it will cause transaction abort. If this is used outside of a transactional region it behaves like a NOP. + + TSXLDTRK +
immintrin.h
+ Miscellaneous +
+ + + Mark the end of a TSX (HLE/RTM) suspend load address tracking region. If this is used inside a suspend load address tracking region it will end the suspend region and all following load addresses will be added to the transaction read set. If this is used inside an active transaction but not in a suspend region it will cause transaction abort. If this is used outside of a transactional region it behaves like a NOP. + + TSXLDTRK +
immintrin.h
+ Miscellaneous +
+ + + + + + Clear the user interrupt flag (UIF). + + UINTR +
immintrin.h
+ General Support +
+ + + + Send user interprocessor interrupts specified in unsigned 64-bit integer "__a". + + UINTR +
immintrin.h
+ General Support +
+ + + + Sets the user interrupt flag (UIF). + + UINTR +
immintrin.h
+ General Support +
+ + + + Store the current user interrupt flag (UIF) in unsigned 8-bit integer "dst". + + UINTR +
immintrin.h
+ General Support +
+ + + + + Reads the contents of a 64-bit MSR specified in "__A" into "dst". + DEST := MSR[__A] + + + USER_MSR +
x86gprintrin.h
+ General Support +
+ + + + + Writes the contents of "__B" into the 64-bit MSR specified in "__A". + MSR[__A] := __B + + + USER_MSR +
x86gprintrin.h
+ General Support +
+ + + + + Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." + FOR j := 0 to 1 + i := j*128 + a[i+127:i] := ShiftRows(a[i+127:i]) + a[i+127:i] := SubBytes(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:256] := 0 + + + VAES + AVX512VL +
immintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." + FOR j := 0 to 1 + i := j*128 + a[i+127:i] := ShiftRows(a[i+127:i]) + a[i+127:i] := SubBytes(a[i+127:i]) + a[i+127:i] := MixColumns(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:256] := 0 + + + VAES + AVX512VL +
immintrin.h
+ Cryptography +
+ + + + + Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". + FOR j := 0 to 1 + i := j*128 + a[i+127:i] := InvShiftRows(a[i+127:i]) + a[i+127:i] := InvSubBytes(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:256] := 0 + + + VAES + AVX512VL +
immintrin.h
+ Cryptography +
+ + + + + Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". + FOR j := 0 to 1 + i := j*128 + a[i+127:i] := InvShiftRows(a[i+127:i]) + a[i+127:i] := InvSubBytes(a[i+127:i]) + a[i+127:i] := InvMixColumns(a[i+127:i]) + dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] +ENDFOR +dst[MAX:256] := 0 + + + VAES + AVX512VL +
immintrin.h
+ Cryptography +
+ + + + + + + + Carry-less multiplication of one quadword of + 'b' by one quadword of 'c', stores + the 128-bit result in 'dst'. The immediate 'Imm8' is + used to determine which quadwords of 'b' + and 'c' should be used. + +DEFINE PCLMUL128(X,Y) { + FOR i := 0 to 63 + TMP[i] := X[ 0 ] and Y[ i ] + FOR j := 1 to i + TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) + ENDFOR + DEST[ i ] := TMP[ i ] + ENDFOR + FOR i := 64 to 126 + TMP[i] := 0 + FOR j := i - 63 to 63 + TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) + ENDFOR + DEST[ i ] := TMP[ i ] + ENDFOR + DEST[127] := 0 + RETURN DEST // 128b vector +} +FOR i := 0 to 1 + IF Imm8[0] == 0 + TEMP1 := b.m128[i].qword[0] + ELSE + TEMP1 := b.m128[i].qword[1] + FI + IF Imm8[4] == 0 + TEMP2 := c.m128[i].qword[0] + ELSE + TEMP2 := c.m128[i].qword[1] + FI + dst.m128[i] := PCLMUL128(TEMP1, TEMP2) +ENDFOR +dst[MAX:256] := 0 + + + VPCLMULQDQ + AVX512VL +
immintrin.h
+ Application-Targeted +
+ + + + + + + + Carry-less multiplication of one quadword of + 'b' by one quadword of 'c', stores + the 128-bit result in 'dst'. The immediate 'Imm8' is + used to determine which quadwords of 'b' + and 'c' should be used. + +DEFINE PCLMUL128(X,Y) { + FOR i := 0 to 63 + TMP[i] := X[ 0 ] and Y[ i ] + FOR j := 1 to i + TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) + ENDFOR + DEST[ i ] := TMP[ i ] + ENDFOR + FOR i := 64 to 126 + TMP[i] := 0 + FOR j := i - 63 to 63 + TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) + ENDFOR + DEST[ i ] := TMP[ i ] + ENDFOR + DEST[127] := 0 + RETURN DEST // 128b vector +} +FOR i := 0 to 3 + IF Imm8[0] == 0 + TEMP1 := b.m128[i].qword[0] + ELSE + TEMP1 := b.m128[i].qword[1] + FI + IF Imm8[4] == 0 + TEMP2 := c.m128[i].qword[0] + ELSE + TEMP2 := c.m128[i].qword[1] + FI + dst.m128[i] := PCLMUL128(TEMP1, TEMP2) +ENDFOR +dst[MAX:512] := 0 + + + VPCLMULQDQ +
immintrin.h
+ Application-Targeted +
+ + + + + + + Directs the processor to enter an implementation-dependent optimized state until the TSC reaches or exceeds the value specified in "counter". Bit 0 of "ctrl" selects between a lower power (cleared) or faster wakeup (set) optimized state. Returns the carry flag (CF). If the processor that executed a UMWAIT instruction wakes due to the expiration of the operating system timelimit, the instructions sets RFLAGS.CF; otherwise, that flag is cleared. + + WAITPKG +
immintrin.h
+ Miscellaneous +
+ + + + + Directs the processor to enter an implementation-dependent optimized state while monitoring a range of addresses. The instruction wakes up when the TSC reaches or exceeds the value specified in "counter" (if the monitoring hardware did not trigger beforehand). Bit 0 of "ctrl" selects between a lower power (cleared) or faster wakeup (set) optimized state. Returns the carry flag (CF). If the processor that executed a UMWAIT instruction wakes due to the expiration of the operating system timelimit, the instructions sets RFLAGS.CF; otherwise, that flag is cleared. + + WAITPKG +
immintrin.h
+ Miscellaneous +
+ + + + Sets up a linear address range to be + monitored by hardware and activates the + monitor. The address range should be a writeback + memory caching type. The address is + contained in "a". + + WAITPKG +
immintrin.h
+ Miscellaneous +
+ + + + + + Write back and do not flush internal caches. + Initiate writing-back without flushing of external + caches. + + WBNOINVD +
immintrin.h
+ Miscellaneous +
+ + + + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSAVEC +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSAVEC +
immintrin.h
+ OS-Targeted +
+ + + + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE instruction. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + 2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSAVEOPT +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE64 instruction. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + 2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSAVEOPT +
immintrin.h
+ OS-Targeted +
+ + + + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSS +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE + XSS +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. + st_mask := mem_addr.HEADER.XSTATE_BV[62:0] +FOR i := 0 to 62 + IF (rs_mask[i] AND XCR0[i]) + IF st_mask[i] + CASE (i) OF + 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] + 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] + DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] + ESAC + ELSE + // ProcessorExtendedState := Processor Supplied Values + CASE (i) OF + 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] + ESAC + FI + FI + i := i + 1 +ENDFOR + + + XSAVE + XSS +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. + st_mask := mem_addr.HEADER.XSTATE_BV[62:0] +FOR i := 0 to 62 + IF (rs_mask[i] AND XCR0[i]) + IF st_mask[i] + CASE (i) OF + 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] + 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] + DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] + ESAC + ELSE + // ProcessorExtendedState := Processor Supplied Values + CASE (i) OF + 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] + ESAC + FI + FI + i := i + 1 +ENDFOR + + + XSAVE + XSS +
immintrin.h
+ OS-Targeted +
+ + + + + + Copy up to 64-bits from the value of the extended control register (XCR) specified by "a" into "dst". Currently only XFEATURE_ENABLED_MASK XCR is supported. + dst[63:0] := XCR[a] + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. + st_mask := mem_addr.HEADER.XSTATE_BV[62:0] +FOR i := 0 to 62 + IF (rs_mask[i] AND XCR0[i]) + IF st_mask[i] + CASE (i) OF + 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] + 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] + DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] + ESAC + ELSE + // ProcessorExtendedState := Processor Supplied Values + CASE (i) OF + 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] + ESAC + FI + FI + i := i + 1 +ENDFOR + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. + st_mask := mem_addr.HEADER.XSTATE_BV[62:0] +FOR i := 0 to 62 + IF (rs_mask[i] AND XCR0[i]) + IF st_mask[i] + CASE (i) OF + 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] + 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] + DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] + ESAC + ELSE + // ProcessorExtendedState := Processor Supplied Values + CASE (i) OF + 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] + ESAC + FI + FI + i := i + 1 +ENDFOR + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. + mask[62:0] := save_mask[62:0] AND XCR0[62:0] +FOR i := 0 to 62 + IF mask[i] + CASE (i) OF + 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] + 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] + DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] + ESAC + mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] + FI + i := i + 1 +ENDFOR + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + + + Copy 64-bits from "val" to the extended control register (XCR) specified by "a". Currently only XFEATURE_ENABLED_MASK XCR is supported. + +XCR[a] := val[63:0] + + + XSAVE +
immintrin.h
+ OS-Targeted +
+ + + \ No newline at end of file From c18a90a57b2888f0460928996189ee3f5d4e10eb Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 5 Aug 2025 21:56:48 +0530 Subject: [PATCH 163/358] fix: update arch flags being sent to the x86 compilation command --- stdarch/crates/intrinsic-test/src/x86/compile.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/compile.rs b/stdarch/crates/intrinsic-test/src/x86/compile.rs index e8c2262b8571f..8baf5815966ef 100644 --- a/stdarch/crates/intrinsic-test/src/x86/compile.rs +++ b/stdarch/crates/intrinsic-test/src/x86/compile.rs @@ -6,7 +6,7 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations let mut command = CompilationCommandBuilder::new() - .add_arch_flags(vec![ + .add_arch_flags([ "avx", "avx2", "avx512f", From c34348710af1ab25c5a2e3eee2bf77425c7152c9 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 5 Aug 2025 23:12:38 +0530 Subject: [PATCH 164/358] fix: set default value for varname and type fields of the parameters/return value of an intrinsic --- stdarch/crates/intrinsic-test/src/x86/xml_parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs index 0b422bddb5808..71788785efbe1 100644 --- a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs @@ -39,9 +39,9 @@ struct XMLIntrinsic { #[derive(Debug, PartialEq, Clone, Deserialize)] pub struct Parameter { - #[serde(rename = "@varname")] + #[serde(rename = "@varname", default)] pub var_name: String, - #[serde(rename = "@type")] + #[serde(rename = "@type", default)] pub type_data: String, #[serde(rename = "@etype", default)] pub etype: String, From 60d0f78ee62da8731760948766d803678c4bf75d Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 5 Aug 2025 23:28:46 +0530 Subject: [PATCH 165/358] fix: correcting semantical logic for setting vec_len --- stdarch/crates/intrinsic-test/src/x86/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 542d1ad3fac80..17980798ea740 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -220,7 +220,7 @@ impl X86IntrinsicType { data.vec_len = match str::parse::(type_processed.as_str()) { // If bit_len is None, vec_len will be None. // Else vec_len will be (num_bits / bit_len). - Ok(num_bits) => data.bit_len.and(Some(num_bits / data.bit_len.unwrap())), + Ok(num_bits) => data.bit_len.and_then(|bit_len| Some(num_bits / bit_len)), Err(_) => None, }; } From 219109113e1f824ce36a3883be9c92bd09b68d1e Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 5 Sep 2025 14:11:38 +0530 Subject: [PATCH 166/358] fix: more support for Mask types --- .../src/common/intrinsic_helpers.rs | 5 +++-- stdarch/crates/intrinsic-test/src/x86/types.rs | 15 +++++++++------ .../crates/intrinsic-test/src/x86/xml_parser.rs | 16 +++++++++++++++- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 7bc1015a387c1..7a2a1ecdc9297 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -131,7 +131,7 @@ impl IntrinsicType { if let Some(bl) = self.bit_len { bl } else { - unreachable!("") + unreachable!("{:#?}", self) } } @@ -222,7 +222,8 @@ impl IntrinsicType { match self { IntrinsicType { bit_len: Some(bit_len @ (8 | 16 | 32 | 64)), - kind: kind @ (TypeKind::Int(_) | TypeKind::Poly | TypeKind::Char(_)), + kind: + kind @ (TypeKind::Int(_) | TypeKind::Poly | TypeKind::Char(_) | TypeKind::Mask), simd_len, vec_len, .. diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 17980798ea740..cf1c56f04dd0c 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -5,6 +5,7 @@ use itertools::Itertools; use regex::Regex; use super::intrinsic::X86IntrinsicType; +use crate::common::argument::Argument; use crate::common::cli::Language; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; use crate::x86::xml_parser::Parameter; @@ -18,7 +19,7 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { fn c_single_vector_type(&self) -> String { // matches __m128, __m256 and similar types - let re = Regex::new(r"\__m\d+\").unwrap(); + let re = Regex::new(r"__m\d+").unwrap(); if re.is_match(self.param.type_data.as_str()) { self.param.type_data.clone() } else { @@ -129,8 +130,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { impl X86IntrinsicType { fn from_c(s: &str) -> Result { let mut s_copy = s.to_string(); - let mut metadata: HashMap = HashMap::new(); - metadata.insert("type".to_string(), s.to_string()); s_copy = s_copy .replace("*", "") .replace("_", "") @@ -196,6 +195,9 @@ impl X86IntrinsicType { let mut etype_processed = param.etype.clone(); etype_processed.retain(|c| c.is_numeric()); + let mut type_processed = param.type_data.clone(); + type_processed.retain(|c| c.is_numeric()); + match str::parse::(etype_processed.as_str()) { Ok(value) => data.bit_len = Some(value), Err(_) => { @@ -209,14 +211,16 @@ impl X86IntrinsicType { } } + if param.type_data.matches("__mmask").next().is_some() { + data.bit_len = str::parse::(type_processed.as_str()).ok(); + } + // then check the param.type and extract numeric part if there are double // underscores. divide this number with bit-len and set this as simd-len. // Only __m types can have a simd-len. if param.type_data.matches("__m").next().is_some() && param.type_data.matches("__mmask").next().is_none() { - let mut type_processed = param.type_data.clone(); - type_processed.retain(|c| c.is_numeric()); data.vec_len = match str::parse::(type_processed.as_str()) { // If bit_len is None, vec_len will be None. // Else vec_len will be (num_bits / bit_len). @@ -235,7 +239,6 @@ impl X86IntrinsicType { // if param.etype == IMM, then it is a constant. // else it stays unchanged. data.constant |= param.etype == "IMM"; - Ok(X86IntrinsicType { data, param: param.clone(), diff --git a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs index 71788785efbe1..7465cb72d5967 100644 --- a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs @@ -3,6 +3,7 @@ use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use crate::x86::constraint::map_constraints; +use regex::Regex; use serde::{Deserialize, Deserializer}; use std::path::Path; @@ -96,11 +97,24 @@ fn xml_to_intrinsic( if args.iter().any(|elem| elem.is_none()) { return Err(Box::from("intrinsic isn't fully supported in this test!")); } - let args = args + let mut args = args .into_iter() .map(|e| e.unwrap()) .filter(|arg| arg.ty.ptr || arg.ty.kind != TypeKind::Void) .collect::>(); + + let mut args_test = args.iter(); + + // if one of the args has etype="MASK" and type="__md", + // then set the bit_len and vec_len accordingly + let re = Regex::new(r"__m\d+").unwrap(); + let is_mask = |arg: &Argument| arg.ty.param.etype.as_str() == "MASK"; + let is_vector = |arg: &Argument| re.is_match(arg.ty.param.type_data.as_str()); + let pos = args_test.position(|arg| is_mask(arg) && is_vector(arg)); + if let Some(index) = pos { + args[index].ty.bit_len = args[0].ty.bit_len; + } + let arguments = ArgumentList:: { args }; if let Err(message) = result { From 177e0044363113263ee1b80b57082709ce42a61e Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sun, 7 Sep 2025 00:02:28 +0530 Subject: [PATCH 167/358] fix: remove unused imports --- stdarch/crates/intrinsic-test/src/x86/types.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index cf1c56f04dd0c..4d5a0a5b7c673 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -1,11 +1,9 @@ -use std::collections::HashMap; use std::str::FromStr; use itertools::Itertools; use regex::Regex; use super::intrinsic::X86IntrinsicType; -use crate::common::argument::Argument; use crate::common::cli::Language; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; use crate::x86::xml_parser::Parameter; From 601845f2b27acba5bb3a425d668cd36d597d3b60 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sun, 7 Sep 2025 16:33:05 +0530 Subject: [PATCH 168/358] feat: implemented print_result_c in the case the target type is Mask-based --- stdarch/crates/intrinsic-test/src/x86/intrinsic.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs b/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs index 169394b793640..a4de1e3bdb39f 100644 --- a/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs +++ b/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs @@ -83,6 +83,7 @@ impl IntrinsicDefinition for Intrinsic { TypeKind::Void => "void".to_string(), TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(), TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(), + TypeKind::Mask => format!("__mmask{}", self.results.bit_len.unwrap()), // TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(), // TypeKind::Int(true) if self.results().inner_size() == 64 => "long".to_string(), // TypeKind::Int(false) if self.results().inner_size() == 64 => "unsigned long".to_string(), From 10275aea3a164946c7465d3fc76d6856071dcb13 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sun, 7 Sep 2025 23:45:43 +0530 Subject: [PATCH 169/358] feat: implemented get_lane_function for x86 --- .../crates/intrinsic-test/src/x86/config.rs | 40 +++++++++++++++++++ stdarch/crates/intrinsic-test/src/x86/mod.rs | 4 +- .../crates/intrinsic-test/src/x86/types.rs | 24 ++++++++++- 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 427ec183a919f..cf831bddd022d 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -14,6 +14,46 @@ pub const F16_FORMATTING_DEF: &str = r#" struct Hex(T); "#; +pub const LANE_FUNCTION_HELPERS: &str = r#" +int mm512_extract(__m512i m, int vec_len, int bit_len, int index) { + int lane_len = 128; + int max_major_index = vec_len / lane_len; + int max_minor_index = lane_len / bit_len; + + int major_index = index / max_major_index; + int minor_index = index % max_minor_index; + + __m128i lane = _mm512_extracti64x2_epi64(m, major_index); + + switch(bit_len){ + case 8: + return _mm_extract_epi8(lane, minor_index); + case 16: + return _mm_extract_epi16(lane, minor_index); + case 32: + return _mm_extract_epi32(lane, minor_index); + case 64: + return _mm_extract_epi64(lane, minor_index); + } +} + +int _mm512_extract_intrinsic_test_epi8(__m512i m, int lane) { + return mm512_extract(m, 512, 8, lane) +} + +int _mm512_extract_intrinsic_test_epi16(__m512i m, int lane) { + return mm512_extract(m, 512, 16, lane) +} + +int mm512_extract_intrinsic_test_epi16(__m512i m, int lane) { + return mm512_extract(m, 512, 16, lane) +} + +int mm512_extract_intrinsic_test_epi64(__m512i m, int lane) { + return mm512_extract(m, 512, 64, lane) +} +"#; + pub const X86_CONFIGURATIONS: &str = r#" #![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))] #![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))] diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index 5515e683854e3..514783a3e0de1 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -17,7 +17,7 @@ use crate::common::gen_rust::{ use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; use crate::common::intrinsic_helpers::TypeKind; use crate::common::{SupportedArchitectureTest, chunk_info}; -use crate::x86::config::{F16_FORMATTING_DEF, X86_CONFIGURATIONS}; +use crate::x86::config::{F16_FORMATTING_DEF, LANE_FUNCTION_HELPERS, X86_CONFIGURATIONS}; use config::build_notices; use intrinsic::X86IntrinsicType; use xml_parser::get_xml_intrinsics; @@ -137,7 +137,7 @@ impl SupportedArchitectureTest for X86ArchitectureTest { &mut main_rs, chunk_count, X86_CONFIGURATIONS, - "", + LANE_FUNCTION_HELPERS, self.intrinsics.iter().map(|i| i.name.as_str()), ) .unwrap(); diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 4d5a0a5b7c673..6ca151308ea83 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -121,7 +121,29 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { /// Determines the get lane function for this type. fn get_lane_function(&self) -> String { - todo!("get_lane_function for X86IntrinsicType needs to be implemented!"); + let total_vector_bits: Option = self + .vec_len + .zip(self.bit_len) + .and_then(|(vec_len, bit_len)| Some(vec_len * bit_len)); + + match (self.bit_len, total_vector_bits) { + (Some(8), Some(128)) => String::from("_mm_extract_epi8"), + (Some(16), Some(128)) => String::from("_mm_extract_epi16"), + (Some(32), Some(128)) => String::from("_mm_extract_epi32"), + (Some(64), Some(128)) => String::from("_mm_extract_epi64"), + (Some(8), Some(256)) => String::from("_mm256_extract_epi8"), + (Some(16), Some(256)) => String::from("_mm256_extract_epi16"), + (Some(32), Some(256)) => String::from("_mm256_extract_epi32"), + (Some(64), Some(256)) => String::from("_mm256_extract_epi64"), + (Some(8), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi8"), + (Some(16), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi16"), + (Some(32), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi32"), + (Some(64), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi64"), + _ => unreachable!( + "invalid length for vector argument: {:?}, {:?}", + self.bit_len, self.vec_len + ), + } } } From 96e5470220de0cba4760f4a241715cbccd8aeef6 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 8 Sep 2025 00:47:07 +0530 Subject: [PATCH 170/358] chore: update c_prefix for mask and print_result_c for vector type --- stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs | 1 + stdarch/crates/intrinsic-test/src/x86/intrinsic.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 7a2a1ecdc9297..1351ca345bb86 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -75,6 +75,7 @@ impl TypeKind { Self::Float => "float", Self::Int(Sign::Signed) => "int", Self::Int(Sign::Unsigned) => "uint", + Self::Mask => "uint", Self::Poly => "poly", Self::Char(Sign::Signed) => "char", _ => unreachable!("Not used: {:#?}", self), diff --git a/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs b/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs index a4de1e3bdb39f..d3a01ec227c23 100644 --- a/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs +++ b/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs @@ -84,6 +84,7 @@ impl IntrinsicDefinition for Intrinsic { TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(), TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(), TypeKind::Mask => format!("__mmask{}", self.results.bit_len.unwrap()), + TypeKind::Vector => format!("__m{}i", self.results.bit_len.unwrap()), // TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(), // TypeKind::Int(true) if self.results().inner_size() == 64 => "long".to_string(), // TypeKind::Int(false) if self.results().inner_size() == 64 => "unsigned long".to_string(), From 78fb3b71839f54bb985dd5807a33fe632f1159fb Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 8 Sep 2025 20:02:03 +0530 Subject: [PATCH 171/358] feat: handled extraction for 64-bit vector elements --- stdarch/crates/intrinsic-test/src/x86/config.rs | 5 +++++ stdarch/crates/intrinsic-test/src/x86/types.rs | 2 ++ 2 files changed, 7 insertions(+) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index cf831bddd022d..bf139e5e53010 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -52,6 +52,11 @@ int mm512_extract_intrinsic_test_epi16(__m512i m, int lane) { int mm512_extract_intrinsic_test_epi64(__m512i m, int lane) { return mm512_extract(m, 512, 64, lane) } + +int mm64_extract_intrinsic_test_epi32(__m64 m, int lane) { + int bit_shift_amount = lane * 32; + return _m_to_int(m >> bit_shift_amount); +} "#; pub const X86_CONFIGURATIONS: &str = r#" diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 6ca151308ea83..2bb1ecb9f6f91 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -139,6 +139,8 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { (Some(16), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi16"), (Some(32), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi32"), (Some(64), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi64"), + (Some(16), Some(64)) => String::from("_mm_extract_pi16"), + (Some(32), Some(64)) => String::from("mm64_extract_intrinsic_test_epi32"), _ => unreachable!( "invalid length for vector argument: {:?}, {:?}", self.bit_len, self.vec_len From 5b7a45f58a2c482ca03c47ae111fb743dda16e1b Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 9 Sep 2025 00:54:50 +0530 Subject: [PATCH 172/358] feat: add 8x8 case for get_lane_function for 64-bit vector --- stdarch/crates/intrinsic-test/src/x86/config.rs | 7 +++++++ stdarch/crates/intrinsic-test/src/x86/types.rs | 1 + 2 files changed, 8 insertions(+) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index bf139e5e53010..d0c5981122f13 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -53,6 +53,13 @@ int mm512_extract_intrinsic_test_epi64(__m512i m, int lane) { return mm512_extract(m, 512, 64, lane) } +int mm64_extract_intrinsic_test_epi8(__m64 m, int lane) { + int real_lane_shift = lane / 2; + int real_bit_shift = (lane % 2) * 8; + int result = _mm_extract_pi16(m, lane / 2); + return (result >> real_bit_shift); +} + int mm64_extract_intrinsic_test_epi32(__m64 m, int lane) { int bit_shift_amount = lane * 32; return _m_to_int(m >> bit_shift_amount); diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 2bb1ecb9f6f91..7e96657977b4a 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -139,6 +139,7 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { (Some(16), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi16"), (Some(32), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi32"), (Some(64), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi64"), + (Some(8), Some(64)) => String::from("mm64_extract_intrinsic_test_epi8"), (Some(16), Some(64)) => String::from("_mm_extract_pi16"), (Some(32), Some(64)) => String::from("mm64_extract_intrinsic_test_epi32"), _ => unreachable!( From 867885e384bd1f1996e917eedc5944386c198ffe Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 10 Sep 2025 00:00:07 +0530 Subject: [PATCH 173/358] debug: printing self incase print_result_c fails. --- stdarch/crates/intrinsic-test/src/x86/intrinsic.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs b/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs index d3a01ec227c23..79f119b0e0896 100644 --- a/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs +++ b/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs @@ -83,8 +83,8 @@ impl IntrinsicDefinition for Intrinsic { TypeKind::Void => "void".to_string(), TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(), TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(), - TypeKind::Mask => format!("__mmask{}", self.results.bit_len.unwrap()), - TypeKind::Vector => format!("__m{}i", self.results.bit_len.unwrap()), + TypeKind::Mask => format!("__mmask{}", self.results.bit_len.expect(format!("self: {:#?}", self).as_str())), + TypeKind::Vector => format!("__m{}i", self.results.bit_len.expect(format!("self: {:#?}", self).as_str())), // TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(), // TypeKind::Int(true) if self.results().inner_size() == 64 => "long".to_string(), // TypeKind::Int(false) if self.results().inner_size() == 64 => "unsigned long".to_string(), From 3197bc596fe7f7457b1a6d2badaf628b4113cab6 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 10 Sep 2025 11:28:02 +0530 Subject: [PATCH 174/358] chore: update x86 module, removed intrinsicDefinition trait, formatting updates --- stdarch/Cargo.lock | 5 +- stdarch/crates/intrinsic-test/src/main.rs | 1 + .../crates/intrinsic-test/src/x86/config.rs | 13 +- .../intrinsic-test/src/x86/intrinsic.rs | 90 +--------- stdarch/crates/intrinsic-test/src/x86/mod.rs | 165 +++--------------- .../crates/intrinsic-test/src/x86/types.rs | 79 ++++++++- 6 files changed, 115 insertions(+), 238 deletions(-) diff --git a/stdarch/Cargo.lock b/stdarch/Cargo.lock index 26a422327187f..70f09adf2c857 100644 --- a/stdarch/Cargo.lock +++ b/stdarch/Cargo.lock @@ -998,17 +998,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -<<<<<<< HEAD name = "windows_x86_64_msvc" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" -======= + +[[package]] name = "xml-rs" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fd8403733700263c6eb89f192880191f1b83e332f7a20371ddcf421c4a337c7" ->>>>>>> 3283a857 (feat: updated intrinsics creation) [[package]] name = "yaml-rust" diff --git a/stdarch/crates/intrinsic-test/src/main.rs b/stdarch/crates/intrinsic-test/src/main.rs index d780e35160364..ed3a50067dc4a 100644 --- a/stdarch/crates/intrinsic-test/src/main.rs +++ b/stdarch/crates/intrinsic-test/src/main.rs @@ -20,6 +20,7 @@ fn main() { | "armv7-unknown-linux-gnueabihf" | "aarch64_be-unknown-linux-gnu" => run(ArmArchitectureTest::create(processed_cli_options)), + "x86_64-unknown-linux-gnu" => run(X86ArchitectureTest::create(processed_cli_options)), _ => std::process::exit(0), } } diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index d0c5981122f13..f1e9e9932e7c2 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -1,12 +1,7 @@ -pub fn build_notices(line_prefix: &str) -> String { - format!( - "\ -{line_prefix}This is a transient test file, not intended for distribution. Some aspects of the -{line_prefix}test are derived from an XML specification, published under the same license as the -{line_prefix}`intrinsic-test` crate.\n -" - ) -} +pub const NOTICE: &str = "\ +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from an XML specification, published under the same license as the +// `intrinsic-test` crate.\n"; // Format f16 values (and vectors containing them) in a way that is consistent with C. pub const F16_FORMATTING_DEF: &str = r#" diff --git a/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs b/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs index 79f119b0e0896..1417c51ea1ee8 100644 --- a/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs +++ b/stdarch/crates/intrinsic-test/src/x86/intrinsic.rs @@ -1,7 +1,4 @@ -use crate::common::argument::ArgumentList; -use crate::common::indentation::Indentation; -use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; -use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, TypeKind}; +use crate::common::intrinsic_helpers::IntrinsicType; use crate::x86::xml_parser::Parameter; use std::ops::{Deref, DerefMut}; @@ -24,88 +21,3 @@ impl DerefMut for X86IntrinsicType { &mut self.data } } - -impl IntrinsicDefinition for Intrinsic { - fn arguments(&self) -> ArgumentList { - self.arguments.clone() - } - - fn results(&self) -> X86IntrinsicType { - self.results.clone() - } - - fn name(&self) -> String { - self.name.clone() - } - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { - let lanes = if self.results().num_vectors() > 1 { - (0..self.results().num_vectors()) - .map(|vector| { - format!( - r#""{ty}(" << {lanes} << ")""#, - ty = self.results().c_single_vector_type(), - lanes = (0..self.results().num_lanes()) - .map(move |idx| -> std::string::String { - format!( - "{cast}{lane_fn}(__return_value.val[{vector}], {lane})", - cast = self.results().c_promotion(), - lane_fn = self.results().get_lane_function(), - lane = idx, - vector = vector, - ) - }) - .collect::>() - .join(r#" << ", " << "#) - ) - }) - .collect::>() - .join(r#" << ", " << "#) - } else if self.results().num_lanes() > 1 { - (0..self.results().num_lanes()) - .map(|idx| -> std::string::String { - format!( - "{cast}{lane_fn}(__return_value, {lane})", - cast = self.results().c_promotion(), - lane_fn = self.results().get_lane_function(), - lane = idx - ) - }) - .collect::>() - .join(r#" << ", " << "#) - } else { - format!( - "{promote}cast<{cast}>(__return_value)", - cast = match self.results.kind() { - TypeKind::Void => "void".to_string(), - TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(), - TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(), - TypeKind::Mask => format!("__mmask{}", self.results.bit_len.expect(format!("self: {:#?}", self).as_str())), - TypeKind::Vector => format!("__m{}i", self.results.bit_len.expect(format!("self: {:#?}", self).as_str())), - // TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(), - // TypeKind::Int(true) if self.results().inner_size() == 64 => "long".to_string(), - // TypeKind::Int(false) if self.results().inner_size() == 64 => "unsigned long".to_string(), - // TypeKind::Int(true) if self.results().inner_size() == 32 => "int".to_string(), - // TypeKind::Int(false) if self.results().inner_size() == 32 => "unsigned int".to_string(), - // TypeKind::Int(true) if self.results().inner_size() == 16 => "short".to_string(), - // TypeKind::Int(false) if self.results().inner_size() == 16 => "unsigned short".to_string(), - _ => self.results.c_scalar_type(), - }, - promote = self.results().c_promotion(), - ) - }; - - format!( - r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, - ty = if self.results().is_simd() { - format!("{}(", self.results().c_type()) - } else { - String::from("") - }, - close = if self.results.is_simd() { ")" } else { "" }, - ) - } -} diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index 514783a3e0de1..e73ceb5084f56 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -5,20 +5,12 @@ mod intrinsic; mod types; mod xml_parser; -use rayon::prelude::*; -use std::fs::{self, File}; - +use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; use crate::common::compare::compare_outputs; -use crate::common::gen_c::{write_main_cpp, write_mod_cpp}; -use crate::common::gen_rust::{ - compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs, write_main_rs, -}; -use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; +use crate::common::compile_c::CppCompilation; +use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; -use crate::common::{SupportedArchitectureTest, chunk_info}; -use crate::x86::config::{F16_FORMATTING_DEF, LANE_FUNCTION_HELPERS, X86_CONFIGURATIONS}; -use config::build_notices; use intrinsic::X86IntrinsicType; use xml_parser::get_xml_intrinsics; @@ -28,7 +20,30 @@ pub struct X86ArchitectureTest { } impl SupportedArchitectureTest for X86ArchitectureTest { - fn create(cli_options: ProcessedCli) -> Box { + type IntrinsicImpl = X86IntrinsicType; + + fn cli_options(&self) -> &ProcessedCli { + &self.cli_options + } + + fn intrinsics(&self) -> &[Intrinsic] { + &self.intrinsics + } + + fn cpp_compilation(&self) -> Option { + compile::build_cpp_compilation(&self.cli_options) + } + + const NOTICE: &str = config::NOTICE; + + const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; + const PLATFORM_C_DEFINITIONS: &str = config::LANE_FUNCTION_HELPERS; + const PLATFORM_C_FORWARD_DECLARATIONS: &str = ""; + + const PLATFORM_RUST_DEFINITIONS: &str = config::F16_FORMATTING_DEF; + const PLATFORM_RUST_CFGS: &str = config::X86_CONFIGURATIONS; + + fn create(cli_options: ProcessedCli) -> Self { let intrinsics = get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file"); @@ -37,7 +52,7 @@ impl SupportedArchitectureTest for X86ArchitectureTest { // Not sure how we would compare intrinsic that returns void. .filter(|i| i.results.kind() != TypeKind::Void) .filter(|i| i.results.kind() != TypeKind::BFloat) - .filter(|i| i.arguments().args.len() > 0) + .filter(|i| i.arguments.args.len() > 0) .filter(|i| !i.arguments.iter().any(|a| a.ty.kind() == TypeKind::BFloat)) // Skip pointers for now, we would probably need to look at the return // type to work out how many elements we need to point to. @@ -47,132 +62,10 @@ impl SupportedArchitectureTest for X86ArchitectureTest { .collect::>(); intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); - Box::new(Self { + Self { intrinsics: intrinsics, cli_options: cli_options, - }) - } - - fn build_c_file(&self) -> bool { - let c_target = "x86_64"; - let platform_headers = &["immintrin.h"]; - - let (chunk_size, chunk_count) = chunk_info(self.intrinsics.len()); - - let cpp_compiler_wrapped = compile::build_cpp_compilation(&self.cli_options); - - let notice = &build_notices("// "); - fs::create_dir_all("c_programs").unwrap(); - self.intrinsics - .par_chunks(chunk_size) - .enumerate() - .map(|(i, chunk)| { - let c_filename = format!("c_programs/mod_{i}.cpp"); - let mut file = File::create(&c_filename).unwrap(); - write_mod_cpp(&mut file, notice, c_target, platform_headers, chunk).unwrap(); - - // compile this cpp file into a .o file. - // - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed - if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - let output = cpp_compiler - .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?; - assert!(output.status.success(), "{output:?}"); - } - - Ok(()) - }) - .collect::>() - .unwrap(); - - let mut file = File::create("c_programs/main.cpp").unwrap(); - write_main_cpp( - &mut file, - c_target, - "\n", - self.intrinsics.iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed - if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - // compile this cpp file into a .o file - info!("compiling main.cpp"); - let output = cpp_compiler - .compile_object_file("main.cpp", "intrinsic-test-programs.o") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - - let object_files = (0..chunk_count) - .map(|i| format!("mod_{i}.o")) - .chain(["intrinsic-test-programs.o".to_owned()]); - - let output = cpp_compiler - .link_executable(object_files, "intrinsic-test-programs") - .unwrap(); - assert!(output.status.success(), "{output:?}"); } - - true - } - - fn build_rust_file(&self) -> bool { - std::fs::create_dir_all("rust_programs/src").unwrap(); - - let architecture = if self.cli_options.target.contains("v7") { - "arm" - } else { - "aarch64" - }; - - let (chunk_size, chunk_count) = chunk_info(self.intrinsics.len()); - - let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); - write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); - - let mut main_rs = File::create("rust_programs/src/main.rs").unwrap(); - write_main_rs( - &mut main_rs, - chunk_count, - X86_CONFIGURATIONS, - LANE_FUNCTION_HELPERS, - self.intrinsics.iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - let target = &self.cli_options.target; - let toolchain = self.cli_options.toolchain.as_deref(); - let linker = self.cli_options.linker.as_deref(); - - let notice = &build_notices("// "); - self.intrinsics - .par_chunks(chunk_size) - .enumerate() - .map(|(i, chunk)| { - std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?; - - let rust_filename = format!("rust_programs/mod_{i}/src/lib.rs"); - trace!("generating `{rust_filename}`"); - let mut file = File::create(rust_filename)?; - - let cfg = X86_CONFIGURATIONS; - let definitions = F16_FORMATTING_DEF; - write_lib_rs(&mut file, architecture, notice, cfg, definitions, chunk)?; - - let toml_filename = format!("rust_programs/mod_{i}/Cargo.toml"); - trace!("generating `{toml_filename}`"); - let mut file = File::create(toml_filename).unwrap(); - - write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?; - - Ok(()) - }) - .collect::>() - .unwrap(); - - compile_rust_programs(toolchain, target, linker) } fn compare_outputs(&self) -> bool { diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 7e96657977b4a..bb7ea59dbece4 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -5,6 +5,7 @@ use regex::Regex; use super::intrinsic::X86IntrinsicType; use crate::common::cli::Language; +use crate::common::indentation::Indentation; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; use crate::x86::xml_parser::Parameter; @@ -116,7 +117,83 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { // then typecast it. format!("({type_value})") } - // Look for edge cases (constexpr, literal, etc) + } + + /// Generates a std::cout for the intrinsics results that will match the + /// rust debug output format for the return type. The generated line assumes + /// there is an int i in scope which is the current pass number. + fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { + let lanes = if self.num_vectors() > 1 { + (0..self.num_vectors()) + .map(|vector| { + format!( + r#""{ty}(" << {lanes} << ")""#, + ty = self.c_single_vector_type(), + lanes = (0..self.num_lanes()) + .map(move |idx| -> std::string::String { + format!( + "{cast}{lane_fn}(__return_value.val[{vector}], {lane})", + cast = self.c_promotion(), + lane_fn = self.get_lane_function(), + lane = idx, + vector = vector, + ) + }) + .collect::>() + .join(r#" << ", " << "#) + ) + }) + .collect::>() + .join(r#" << ", " << "#) + } else if self.num_lanes() > 1 { + (0..self.num_lanes()) + .map(|idx| -> std::string::String { + format!( + "{cast}{lane_fn}(__return_value, {lane})", + cast = self.c_promotion(), + lane_fn = self.get_lane_function(), + lane = idx + ) + }) + .collect::>() + .join(r#" << ", " << "#) + } else { + format!( + "{promote}cast<{cast}>(__return_value)", + cast = match self.kind() { + TypeKind::Void => "void".to_string(), + TypeKind::Float if self.inner_size() == 64 => "double".to_string(), + TypeKind::Float if self.inner_size() == 32 => "float".to_string(), + TypeKind::Mask => format!( + "__mmask{}", + self.bit_len.expect(format!("self: {:#?}", self).as_str()) + ), + TypeKind::Vector => format!( + "__m{}i", + self.bit_len.expect(format!("self: {:#?}", self).as_str()) + ), + // TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(), + // TypeKind::Int(true) if self.results().inner_size() == 64 => "long".to_string(), + // TypeKind::Int(false) if self.results().inner_size() == 64 => "unsigned long".to_string(), + // TypeKind::Int(true) if self.results().inner_size() == 32 => "int".to_string(), + // TypeKind::Int(false) if self.results().inner_size() == 32 => "unsigned int".to_string(), + // TypeKind::Int(true) if self.results().inner_size() == 16 => "short".to_string(), + // TypeKind::Int(false) if self.results().inner_size() == 16 => "unsigned short".to_string(), + _ => self.c_scalar_type(), + }, + promote = self.c_promotion(), + ) + }; + + format!( + r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, + ty = if self.is_simd() { + format!("{}(", self.c_type()) + } else { + String::from("") + }, + close = if self.is_simd() { ")" } else { "" }, + ) } /// Determines the get lane function for this type. From e3b6e5a8a5e577ea8db55fd2bd57cb6c07e5c60e Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sat, 13 Sep 2025 23:04:08 +0530 Subject: [PATCH 175/358] fixed errors that caused errors with cpp file generation (un-handled edge cases for Vector and Mask types) --- .../crates/intrinsic-test/src/common/cli.rs | 4 +- .../intrinsic-test/src/common/gen_rust.rs | 6 +-- .../src/common/intrinsic_helpers.rs | 44 +++++++++++++++---- .../crates/intrinsic-test/src/common/mod.rs | 9 ++++ stdarch/crates/intrinsic-test/src/x86/mod.rs | 19 -------- .../crates/intrinsic-test/src/x86/types.rs | 20 +++++++++ 6 files changed, 70 insertions(+), 32 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/cli.rs b/stdarch/crates/intrinsic-test/src/common/cli.rs index beae6a4b044da..f8125128ea840 100644 --- a/stdarch/crates/intrinsic-test/src/common/cli.rs +++ b/stdarch/crates/intrinsic-test/src/common/cli.rs @@ -44,7 +44,9 @@ pub struct Cli { pub generate_only: bool, /// Pass a target the test suite - #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))] + /// x86_64-unknown-linux-gnu + /// armv7-unknown-linux-gnueabihf + #[arg(long, default_value_t = String::from("x86_64-unknown-linux-gnu"))] pub target: String, /// Set the linker diff --git a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs index d659cbc4aaa76..e97b745c59944 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -191,7 +191,7 @@ pub fn generate_rust_test_loop( w: &mut impl std::io::Write, intrinsic: &Intrinsic, indentation: Indentation, - specializations: &[Vec], + specializations: &[Vec], passes: u32, ) -> std::io::Result<()> { let intrinsic_name = &intrinsic.name; @@ -256,7 +256,7 @@ pub fn generate_rust_test_loop( /// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic. fn generate_rust_specializations( constraints: &mut impl Iterator>, -) -> Vec> { +) -> Vec> { let mut specializations = vec![vec![]]; for constraint in constraints { @@ -264,7 +264,7 @@ fn generate_rust_specializations( .flat_map(|right| { specializations.iter().map(move |left| { let mut left = left.clone(); - left.push(u8::try_from(right).unwrap()); + left.push(i64::try_from(right).unwrap()); left }) }) diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 1351ca345bb86..7403b81df83fb 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -78,6 +78,7 @@ impl TypeKind { Self::Mask => "uint", Self::Poly => "poly", Self::Char(Sign::Signed) => "char", + Self::Vector => "int", _ => unreachable!("Not used: {:#?}", self), } } @@ -155,6 +156,7 @@ impl IntrinsicType { pub fn c_scalar_type(&self) -> String { match self.kind() { TypeKind::Char(_) => String::from("char"), + TypeKind::Vector => String::from("int32_t"), _ => format!( "{prefix}{bits}_t", prefix = self.kind().c_prefix(), @@ -163,14 +165,6 @@ impl IntrinsicType { } } - pub fn rust_scalar_type(&self) -> String { - format!( - "{prefix}{bits}", - prefix = self.kind().rust_prefix(), - bits = self.inner_size() - ) - } - pub fn c_promotion(&self) -> &str { match *self { IntrinsicType { @@ -285,6 +279,29 @@ impl IntrinsicType { ))) ) } + IntrinsicType { + kind: TypeKind::Vector, + bit_len: Some(bit_len @ (128 | 256 | 512)), + simd_len, + .. + } => { + let (prefix, suffix) = match language { + Language::Rust => ("[", "]"), + Language::C => ("{", "}"), + }; + let body_indentation = indentation.nested(); + let effective_bit_len = 32; + let effective_vec_len = bit_len / effective_bit_len; + format!( + "{prefix}\n{body}\n{indentation}{suffix}", + body = (0..(simd_len.unwrap_or(1) * effective_vec_len + loads - 1)) + .format_with(",\n", |i, fmt| { + let src = value_for_array(effective_bit_len, i); + assert!(src == 0 || src.ilog2() < *bit_len); + fmt(&format_args!("{body_indentation}{src:#x}")) + }) + ) + } _ => unimplemented!("populate random: {:#?}", self), } } @@ -300,7 +317,7 @@ impl IntrinsicType { kind: TypeKind::Int(_) | TypeKind::Poly, .. } => true, - _ => unimplemented!(), + _ => true, } } @@ -332,4 +349,13 @@ pub trait IntrinsicTypeDefinition: Deref { /// rust debug output format for the return type. The generated line assumes /// there is an int i in scope which is the current pass number. fn print_result_c(&self, indentation: Indentation, additional: &str) -> String; + + /// To enable architecture-specific logic + fn rust_scalar_type(&self) -> String { + format!( + "{prefix}{bits}", + prefix = self.kind().rust_prefix(), + bits = self.inner_size() + ) + } } diff --git a/stdarch/crates/intrinsic-test/src/common/mod.rs b/stdarch/crates/intrinsic-test/src/common/mod.rs index 666b3885c147b..cb422c9cace6d 100644 --- a/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -1,4 +1,5 @@ use std::fs::File; +use std::io::{self, Write}; use rayon::prelude::*; @@ -76,6 +77,14 @@ pub trait SupportedArchitectureTest { if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { let output = cpp_compiler .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?; + if !output.status.success() { + io::stdout() + .write_all(&output.stdout) + .expect("Failed to write to stdout!"); + io::stderr() + .write_all(&output.stderr) + .expect("Failed to write to stderr!"); + } assert!(output.status.success(), "{output:?}"); } diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index e73ceb5084f56..13ae627e66397 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -7,7 +7,6 @@ mod xml_parser; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; -use crate::common::compare::compare_outputs; use crate::common::compile_c::CppCompilation; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; @@ -67,22 +66,4 @@ impl SupportedArchitectureTest for X86ArchitectureTest { cli_options: cli_options, } } - - fn compare_outputs(&self) -> bool { - if self.cli_options.toolchain.is_some() { - let intrinsics_name_list = self - .intrinsics - .iter() - .map(|i| i.name.clone()) - .collect::>(); - - compare_outputs( - &intrinsics_name_list, - &self.cli_options.runner, - &self.cli_options.target, - ) - } else { - true - } - } } diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index bb7ea59dbece4..127dd38e6fe17 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -225,6 +225,20 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { ), } } + + fn rust_scalar_type(&self) -> String { + let re = Regex::new(r"\__m\d+[a-z]*").unwrap(); + if let Some(match_type) = re.find(self.param.type_data.as_str()) { + match_type.as_str().to_string() + } else { + let prefix = match self.data.kind { + TypeKind::Mask => String::from("__mmask"), + _ => self.kind().rust_prefix().to_string(), + }; + + format!("{prefix}{bits}", bits = self.inner_size()) + } + } } impl X86IntrinsicType { @@ -336,6 +350,12 @@ impl X86IntrinsicType { data.bit_len = Some(8); } + // default settings for "void *" parameters + // often used by intrinsics to denote memory address or so. + if data.kind == TypeKind::Mask && data.bit_len.is_none() { + data.bit_len = Some(32); + } + // if param.etype == IMM, then it is a constant. // else it stays unchanged. data.constant |= param.etype == "IMM"; From ed56f6299bac6f24fb00844966abf506a2b946a1 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sun, 14 Sep 2025 21:50:57 +0530 Subject: [PATCH 176/358] feat: correcting errors with generated C artifacts --- stdarch/crates/intrinsic-test/src/arm/mod.rs | 12 +++- .../intrinsic-test/src/common/argument.rs | 2 +- .../crates/intrinsic-test/src/common/gen_c.rs | 16 ++--- .../crates/intrinsic-test/src/common/mod.rs | 1 + .../crates/intrinsic-test/src/x86/compile.rs | 16 +---- .../crates/intrinsic-test/src/x86/config.rs | 63 +++++++------------ stdarch/crates/intrinsic-test/src/x86/mod.rs | 12 +++- .../crates/intrinsic-test/src/x86/types.rs | 16 ++++- 8 files changed, 63 insertions(+), 75 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/mod.rs b/stdarch/crates/intrinsic-test/src/arm/mod.rs index 08dc2d38702cd..a915d0d883ba8 100644 --- a/stdarch/crates/intrinsic-test/src/arm/mod.rs +++ b/stdarch/crates/intrinsic-test/src/arm/mod.rs @@ -31,7 +31,17 @@ impl SupportedArchitectureTest for ArmArchitectureTest { const NOTICE: &str = config::NOTICE; - const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; + const PLATFORM_C_HEADERS: &[&str] = &[ + "iostream", + "cstring", + "iomanip", + "sstream", + "cstddef", + "cstdint", + "arm_neon.h", + "arm_acle.h", + "arm_fp16.h", + ]; const PLATFORM_C_DEFINITIONS: &str = config::POLY128_OSTREAM_DEF; const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::POLY128_OSTREAM_DECL; diff --git a/stdarch/crates/intrinsic-test/src/common/argument.rs b/stdarch/crates/intrinsic-test/src/common/argument.rs index f38515e40a9d6..871e3d2243cfa 100644 --- a/stdarch/crates/intrinsic-test/src/common/argument.rs +++ b/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -108,7 +108,7 @@ where for arg in self.iter().filter(|&arg| !arg.has_constraint()) { writeln!( w, - "{indentation}const {ty} {name}_vals[] = {values};", + "{indentation}alignas(64) const {ty} {name}_vals[] = {values};", ty = arg.ty.c_scalar_type(), name = arg.name, values = arg.ty.populate_random(indentation, loads, &Language::C) diff --git a/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/stdarch/crates/intrinsic-test/src/common/gen_c.rs index 28902b3dfe981..b7651dce59d1f 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_c.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_c.rs @@ -47,7 +47,7 @@ pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( let ty = current.ty.c_type(); writeln!(w, "{indentation}{{")?; - writeln!(w, "{body_indentation}{ty} {} = {i};", current.name)?; + writeln!(w, "{body_indentation}const {ty} {} = {i};", current.name)?; generate_c_constraint_blocks( w, @@ -103,14 +103,11 @@ pub fn write_mod_cpp( writeln!(w, "#include <{header}>")?; } + writeln!(w, "{}", forward_declarations)?; + writeln!( w, r#" -#include -#include -#include -#include - template T1 cast(T2 x) {{ static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); T1 ret{{}}; @@ -120,13 +117,9 @@ template T1 cast(T2 x) {{ std::ostream& operator<<(std::ostream& os, float16_t value); - - "# )?; - writeln!(w, "{}", forward_declarations)?; - for intrinsic in intrinsics { create_c_test_function(w, intrinsic)?; } @@ -137,12 +130,13 @@ std::ostream& operator<<(std::ostream& os, float16_t value); pub fn write_main_cpp<'a>( w: &mut impl std::io::Write, arch_specific_definitions: &str, + arch_specific_headers: &[&str], intrinsics: impl Iterator + Clone, ) -> std::io::Result<()> { writeln!(w, "#include ")?; writeln!(w, "#include ")?; - for header in ["arm_neon.h", "arm_acle.h", "arm_fp16.h"] { + for header in arch_specific_headers { writeln!(w, "#include <{header}>")?; } diff --git a/stdarch/crates/intrinsic-test/src/common/mod.rs b/stdarch/crates/intrinsic-test/src/common/mod.rs index cb422c9cace6d..5966bc2aecd09 100644 --- a/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -97,6 +97,7 @@ pub trait SupportedArchitectureTest { write_main_cpp( &mut file, Self::PLATFORM_C_DEFINITIONS, + Self::PLATFORM_C_HEADERS, self.intrinsics().iter().map(|i| i.name.as_str()), ) .unwrap(); diff --git a/stdarch/crates/intrinsic-test/src/x86/compile.rs b/stdarch/crates/intrinsic-test/src/x86/compile.rs index 8baf5815966ef..3e08a491a0131 100644 --- a/stdarch/crates/intrinsic-test/src/x86/compile.rs +++ b/stdarch/crates/intrinsic-test/src/x86/compile.rs @@ -6,21 +6,7 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations let mut command = CompilationCommandBuilder::new() - .add_arch_flags([ - "avx", - "avx2", - "avx512f", - "avx512cd", - "avx512dq", - "avx512vl", - "avx512bw", - "avx512bf16", - "avx512bitalg", - "lzcnt", - "popcnt", - "adx", - "aes", - ]) + .add_arch_flags(["icelake-client"]) .set_compiler(cpp_compiler) .set_target(&config.target) .set_opt_level("2") diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index f1e9e9932e7c2..e43fd33093011 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -10,62 +10,41 @@ struct Hex(T); "#; pub const LANE_FUNCTION_HELPERS: &str = r#" -int mm512_extract(__m512i m, int vec_len, int bit_len, int index) { - int lane_len = 128; - int max_major_index = vec_len / lane_len; - int max_minor_index = lane_len / bit_len; +typedef float float16_t; +typedef float float32_t; +typedef double float64_t; - int major_index = index / max_major_index; - int minor_index = index % max_minor_index; +#define __int64 long long - __m128i lane = _mm512_extracti64x2_epi64(m, major_index); +#define _mm512_extract_intrinsic_test_epi8(m, lane) \ + _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) - switch(bit_len){ - case 8: - return _mm_extract_epi8(lane, minor_index); - case 16: - return _mm_extract_epi16(lane, minor_index); - case 32: - return _mm_extract_epi32(lane, minor_index); - case 64: - return _mm_extract_epi64(lane, minor_index); - } -} +#define _mm512_extract_intrinsic_test_epi16(m, lane) \ + _mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8) -int _mm512_extract_intrinsic_test_epi8(__m512i m, int lane) { - return mm512_extract(m, 512, 8, lane) -} +#define _mm512_extract_intrinsic_test_epi32(m, lane) \ + _mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4) -int _mm512_extract_intrinsic_test_epi16(__m512i m, int lane) { - return mm512_extract(m, 512, 16, lane) -} +#define _mm512_extract_intrinsic_test_epi64(m, lane) \ + _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2) -int mm512_extract_intrinsic_test_epi16(__m512i m, int lane) { - return mm512_extract(m, 512, 16, lane) -} +#define _mm64_extract_intrinsic_test_epi8(m, lane) \ + ((_mm_extract_pi16((m), (lane) / 2) >> (((lane) % 2) * 8)) & 0xFF) -int mm512_extract_intrinsic_test_epi64(__m512i m, int lane) { - return mm512_extract(m, 512, 64, lane) -} - -int mm64_extract_intrinsic_test_epi8(__m64 m, int lane) { - int real_lane_shift = lane / 2; - int real_bit_shift = (lane % 2) * 8; - int result = _mm_extract_pi16(m, lane / 2); - return (result >> real_bit_shift); -} - -int mm64_extract_intrinsic_test_epi32(__m64 m, int lane) { - int bit_shift_amount = lane * 32; - return _m_to_int(m >> bit_shift_amount); -} +#define _mm64_extract_intrinsic_test_epi32(m, lane) \ + _mm_cvtsi64_si32(_mm_srli_si64(m, (lane) * 32)) "#; pub const X86_CONFIGURATIONS: &str = r#" +#![cfg_attr(target_arch = "x86", feature(avx))] +#![cfg_attr(target_arch = "x86", feature(sse))] +#![cfg_attr(target_arch = "x86", feature(sse2))] #![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))] #![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))] #![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] #![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] +#![cfg_attr(target_arch = "x86_64", feature(sse))] +#![cfg_attr(target_arch = "x86_64", feature(sse2))] #![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))] #![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))] #![feature(fmt_helpers_for_derive)] diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index 13ae627e66397..2ed329616963a 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -35,9 +35,17 @@ impl SupportedArchitectureTest for X86ArchitectureTest { const NOTICE: &str = config::NOTICE; - const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; + const PLATFORM_C_HEADERS: &[&str] = &[ + "immintrin.h", + "iostream", + "cstring", + "iomanip", + "sstream", + "cstddef", + "cstdint", + ]; const PLATFORM_C_DEFINITIONS: &str = config::LANE_FUNCTION_HELPERS; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = ""; + const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::LANE_FUNCTION_HELPERS; const PLATFORM_RUST_DEFINITIONS: &str = config::F16_FORMATTING_DEF; const PLATFORM_RUST_CFGS: &str = config::X86_CONFIGURATIONS; diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 127dd38e6fe17..a1dc5623ca1b2 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -110,7 +110,17 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { .filter(|c| c.is_numeric()) .join("") .replace("128", ""); - format!("_mm{type_val_filtered}_set1_epi64") + { + if type_value.ends_with("d") { + format!("_mm{type_val_filtered}_loadu_pd") + } else if type_value.ends_with("h") { + format!("_mm{type_val_filtered}_loadu_ph") + } else if type_value.ends_with("i") { + format!("_mm{type_val_filtered}_loadu_epi16") + } else { + format!("_mm{type_val_filtered}_loadu_ps") + } + } } else { // if it is a pointer, then rely on type conversion // If it is not any of the above type (__int, __bfloat16, unsigned short, etc) @@ -216,9 +226,9 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { (Some(16), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi16"), (Some(32), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi32"), (Some(64), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi64"), - (Some(8), Some(64)) => String::from("mm64_extract_intrinsic_test_epi8"), + (Some(8), Some(64)) => String::from("_mm64_extract_intrinsic_test_epi8"), (Some(16), Some(64)) => String::from("_mm_extract_pi16"), - (Some(32), Some(64)) => String::from("mm64_extract_intrinsic_test_epi32"), + (Some(32), Some(64)) => String::from("_mm64_extract_intrinsic_test_epi32"), _ => unreachable!( "invalid length for vector argument: {:?}, {:?}", self.bit_len, self.vec_len From 3dc9b85bb79998e53f33285a424ded7b2664bffe Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 15 Sep 2025 01:03:48 +0530 Subject: [PATCH 177/358] fix: vec_len -> simd_len (an error was present due to setting vec_len instead of simd_len for AVX register types) --- stdarch/crates/intrinsic-test/src/x86/config.rs | 1 + stdarch/crates/intrinsic-test/src/x86/types.rs | 12 ++++++------ stdarch/crates/intrinsic-test/src/x86/xml_parser.rs | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index e43fd33093011..58fabcbd0ebdf 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -15,6 +15,7 @@ typedef float float32_t; typedef double float64_t; #define __int64 long long +#define __int32 int #define _mm512_extract_intrinsic_test_epi8(m, lane) \ _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index a1dc5623ca1b2..99c52551ad772 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -209,9 +209,9 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { /// Determines the get lane function for this type. fn get_lane_function(&self) -> String { let total_vector_bits: Option = self - .vec_len + .simd_len .zip(self.bit_len) - .and_then(|(vec_len, bit_len)| Some(vec_len * bit_len)); + .and_then(|(simd_len, bit_len)| Some(simd_len * bit_len)); match (self.bit_len, total_vector_bits) { (Some(8), Some(128)) => String::from("_mm_extract_epi8"), @@ -231,7 +231,7 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { (Some(32), Some(64)) => String::from("_mm64_extract_intrinsic_test_epi32"), _ => unreachable!( "invalid length for vector argument: {:?}, {:?}", - self.bit_len, self.vec_len + self.bit_len, self.simd_len ), } } @@ -345,9 +345,9 @@ impl X86IntrinsicType { if param.type_data.matches("__m").next().is_some() && param.type_data.matches("__mmask").next().is_none() { - data.vec_len = match str::parse::(type_processed.as_str()) { - // If bit_len is None, vec_len will be None. - // Else vec_len will be (num_bits / bit_len). + data.simd_len = match str::parse::(type_processed.as_str()) { + // If bit_len is None, simd_len will be None. + // Else simd_len will be (num_bits / bit_len). Ok(num_bits) => data.bit_len.and_then(|bit_len| Some(num_bits / bit_len)), Err(_) => None, }; diff --git a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs index 7465cb72d5967..808f594a8c728 100644 --- a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs @@ -106,7 +106,7 @@ fn xml_to_intrinsic( let mut args_test = args.iter(); // if one of the args has etype="MASK" and type="__md", - // then set the bit_len and vec_len accordingly + // then set the bit_len and simd_len accordingly let re = Regex::new(r"__m\d+").unwrap(); let is_mask = |arg: &Argument| arg.ty.param.etype.as_str() == "MASK"; let is_vector = |arg: &Argument| re.is_match(arg.ty.param.type_data.as_str()); From be8f67998f1195cfad0672a17355edd9bfa97cdf Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 16 Sep 2025 17:08:35 +0530 Subject: [PATCH 178/358] chore: revert default target --- stdarch/crates/intrinsic-test/src/common/cli.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/cli.rs b/stdarch/crates/intrinsic-test/src/common/cli.rs index f8125128ea840..beae6a4b044da 100644 --- a/stdarch/crates/intrinsic-test/src/common/cli.rs +++ b/stdarch/crates/intrinsic-test/src/common/cli.rs @@ -44,9 +44,7 @@ pub struct Cli { pub generate_only: bool, /// Pass a target the test suite - /// x86_64-unknown-linux-gnu - /// armv7-unknown-linux-gnueabihf - #[arg(long, default_value_t = String::from("x86_64-unknown-linux-gnu"))] + #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))] pub target: String, /// Set the linker From 747fcc9c3452e06aa36ca536ae77905cd6fd40f5 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 17 Sep 2025 23:58:20 +0530 Subject: [PATCH 179/358] chore: adding comments about memory alignment of variables and bash scripts that will be used in CI --- stdarch/crates/intrinsic-test/src/arm/mod.rs | 2 -- stdarch/crates/intrinsic-test/src/common/argument.rs | 2 ++ stdarch/crates/intrinsic-test/src/common/gen_rust.rs | 6 +++--- stdarch/crates/intrinsic-test/src/common/mod.rs | 9 --------- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/mod.rs b/stdarch/crates/intrinsic-test/src/arm/mod.rs index a915d0d883ba8..8f8289a7ecf40 100644 --- a/stdarch/crates/intrinsic-test/src/arm/mod.rs +++ b/stdarch/crates/intrinsic-test/src/arm/mod.rs @@ -36,8 +36,6 @@ impl SupportedArchitectureTest for ArmArchitectureTest { "cstring", "iomanip", "sstream", - "cstddef", - "cstdint", "arm_neon.h", "arm_acle.h", "arm_fp16.h", diff --git a/stdarch/crates/intrinsic-test/src/common/argument.rs b/stdarch/crates/intrinsic-test/src/common/argument.rs index 871e3d2243cfa..0ab01e4144c5e 100644 --- a/stdarch/crates/intrinsic-test/src/common/argument.rs +++ b/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -106,6 +106,8 @@ where loads: u32, ) -> std::io::Result<()> { for arg in self.iter().filter(|&arg| !arg.has_constraint()) { + // Setting the variables on an aligned boundary to make it easier to pick + // functions (of a specific architecture) that would help load the values. writeln!( w, "{indentation}alignas(64) const {ty} {name}_vals[] = {values};", diff --git a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs index e97b745c59944..3b330879e05be 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -191,7 +191,7 @@ pub fn generate_rust_test_loop( w: &mut impl std::io::Write, intrinsic: &Intrinsic, indentation: Indentation, - specializations: &[Vec], + specializations: &[Vec], passes: u32, ) -> std::io::Result<()> { let intrinsic_name = &intrinsic.name; @@ -256,7 +256,7 @@ pub fn generate_rust_test_loop( /// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic. fn generate_rust_specializations( constraints: &mut impl Iterator>, -) -> Vec> { +) -> Vec> { let mut specializations = vec![vec![]]; for constraint in constraints { @@ -264,7 +264,7 @@ fn generate_rust_specializations( .flat_map(|right| { specializations.iter().map(move |left| { let mut left = left.clone(); - left.push(i64::try_from(right).unwrap()); + left.push(i32::try_from(right).unwrap()); left }) }) diff --git a/stdarch/crates/intrinsic-test/src/common/mod.rs b/stdarch/crates/intrinsic-test/src/common/mod.rs index 5966bc2aecd09..da9c75f5a00e3 100644 --- a/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -1,5 +1,4 @@ use std::fs::File; -use std::io::{self, Write}; use rayon::prelude::*; @@ -77,14 +76,6 @@ pub trait SupportedArchitectureTest { if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { let output = cpp_compiler .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?; - if !output.status.success() { - io::stdout() - .write_all(&output.stdout) - .expect("Failed to write to stdout!"); - io::stderr() - .write_all(&output.stderr) - .expect("Failed to write to stderr!"); - } assert!(output.status.success(), "{output:?}"); } From 0a543ef4960c97d7e66550e60f65876442641380 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 18 Sep 2025 01:39:51 +0530 Subject: [PATCH 180/358] chore: add compilation flags --- stdarch/crates/intrinsic-test/src/x86/compile.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/compile.rs b/stdarch/crates/intrinsic-test/src/x86/compile.rs index 3e08a491a0131..9f3a76c4c1bc2 100644 --- a/stdarch/crates/intrinsic-test/src/x86/compile.rs +++ b/stdarch/crates/intrinsic-test/src/x86/compile.rs @@ -12,7 +12,14 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { .set_opt_level("2") .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) .set_project_root("c_programs") - .add_extra_flags(vec!["-ffp-contract=off", "-Wno-narrowing"]); + .add_extra_flags(vec![ + "-ffp-contract=off", + "-Wno-narrowing", + "-mavx", + "-mavx2", + "-mavx512f", + "-msse2", + ]); if !cpp_compiler.contains("clang") { command = command.add_extra_flag("-flax-vector-conversions"); From bae8b41e260913a6e0e6ccfebf40006be9ac50eb Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 18 Sep 2025 13:41:46 +0530 Subject: [PATCH 181/358] chore: add better error handling when writing and compiling mod_{i}.cpp, neatly organize C++ headers --- stdarch/crates/intrinsic-test/src/arm/mod.rs | 10 +--------- .../crates/intrinsic-test/src/common/gen_c.rs | 12 +++-------- .../crates/intrinsic-test/src/common/mod.rs | 20 ++++++++++++------- stdarch/crates/intrinsic-test/src/x86/mod.rs | 10 +--------- 4 files changed, 18 insertions(+), 34 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/mod.rs b/stdarch/crates/intrinsic-test/src/arm/mod.rs index 8f8289a7ecf40..08dc2d38702cd 100644 --- a/stdarch/crates/intrinsic-test/src/arm/mod.rs +++ b/stdarch/crates/intrinsic-test/src/arm/mod.rs @@ -31,15 +31,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest { const NOTICE: &str = config::NOTICE; - const PLATFORM_C_HEADERS: &[&str] = &[ - "iostream", - "cstring", - "iomanip", - "sstream", - "arm_neon.h", - "arm_acle.h", - "arm_fp16.h", - ]; + const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; const PLATFORM_C_DEFINITIONS: &str = config::POLY128_OSTREAM_DEF; const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::POLY128_OSTREAM_DECL; diff --git a/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/stdarch/crates/intrinsic-test/src/common/gen_c.rs index b7651dce59d1f..25e4e210c397a 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_c.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_c.rs @@ -6,6 +6,7 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; // The number of times each intrinsic will be called. const PASSES: u32 = 20; +const COMMON_HEADERS: [&str; 5] = ["iostream", "string", "cstring", "iomanip", "sstream"]; pub fn generate_c_test_loop( w: &mut impl std::io::Write, @@ -99,7 +100,7 @@ pub fn write_mod_cpp( ) -> std::io::Result<()> { write!(w, "{notice}")?; - for header in platform_headers { + for header in COMMON_HEADERS.iter().chain(platform_headers.iter()) { writeln!(w, "#include <{header}>")?; } @@ -133,20 +134,13 @@ pub fn write_main_cpp<'a>( arch_specific_headers: &[&str], intrinsics: impl Iterator + Clone, ) -> std::io::Result<()> { - writeln!(w, "#include ")?; - writeln!(w, "#include ")?; - - for header in arch_specific_headers { + for header in COMMON_HEADERS.iter().chain(arch_specific_headers.iter()) { writeln!(w, "#include <{header}>")?; } writeln!( w, r#" -#include -#include -#include - std::ostream& operator<<(std::ostream& os, float16_t value) {{ uint16_t temp = 0; memcpy(&temp, &value, sizeof(float16_t)); diff --git a/stdarch/crates/intrinsic-test/src/common/mod.rs b/stdarch/crates/intrinsic-test/src/common/mod.rs index da9c75f5a00e3..37a48654e4ca3 100644 --- a/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -60,28 +60,34 @@ pub trait SupportedArchitectureTest { .map(|(i, chunk)| { let c_filename = format!("c_programs/mod_{i}.cpp"); let mut file = File::create(&c_filename).unwrap(); - write_mod_cpp( + let mod_file_write_result = write_mod_cpp( &mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, Self::PLATFORM_C_FORWARD_DECLARATIONS, chunk, - ) - .unwrap(); + ); + + if let Err(error) = mod_file_write_result { + return Err(format!("Error writing to mod_{i}.cpp: {error:?}")); + } // compile this cpp file into a .o file. // // This is done because `cpp_compiler_wrapped` is None when // the --generate-only flag is passed if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - let output = cpp_compiler - .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?; - assert!(output.status.success(), "{output:?}"); + let compile_output = cpp_compiler + .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o")); + + if let Err(compile_error) = compile_output { + return Err(format!("Error compiling mod_{i}.cpp: {compile_error:?}")); + } } Ok(()) }) - .collect::>() + .collect::>() .unwrap(); let mut file = File::create("c_programs/main.cpp").unwrap(); diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index 2ed329616963a..e4c9742f8d2c4 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -35,15 +35,7 @@ impl SupportedArchitectureTest for X86ArchitectureTest { const NOTICE: &str = config::NOTICE; - const PLATFORM_C_HEADERS: &[&str] = &[ - "immintrin.h", - "iostream", - "cstring", - "iomanip", - "sstream", - "cstddef", - "cstdint", - ]; + const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h", "cstddef", "cstdint"]; const PLATFORM_C_DEFINITIONS: &str = config::LANE_FUNCTION_HELPERS; const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::LANE_FUNCTION_HELPERS; From ca0a48dae43e2809cbf4801e17de08e4f8d2b705 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sat, 20 Sep 2025 15:14:43 +0530 Subject: [PATCH 182/358] feat: Fixed FP16 errors, made the loading function generation more accurate --- .../crates/intrinsic-test/src/arm/config.rs | 10 ++++ .../intrinsic-test/src/common/argument.rs | 14 ++++-- .../crates/intrinsic-test/src/common/gen_c.rs | 23 ++-------- .../src/common/intrinsic_helpers.rs | 5 +- .../intrinsic-test/src/common/values.rs | 25 ++++++++++ .../crates/intrinsic-test/src/x86/compile.rs | 6 +++ .../crates/intrinsic-test/src/x86/config.rs | 13 +++++- .../crates/intrinsic-test/src/x86/types.rs | 46 ++++++++++++++----- .../intrinsic-test/src/x86/xml_parser.rs | 2 + 9 files changed, 107 insertions(+), 37 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/config.rs b/stdarch/crates/intrinsic-test/src/arm/config.rs index d9024eabfaf46..46706e009eba1 100644 --- a/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -6,6 +6,7 @@ pub const NOTICE: &str = "\ pub const POLY128_OSTREAM_DECL: &str = r#" #ifdef __aarch64__ std::ostream& operator<<(std::ostream& os, poly128_t value); +std::ostream& operator<<(std::ostream& os, float16_t value); #endif "#; @@ -23,6 +24,15 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) { os << res; return os; } + +std::ostream& operator<<(std::ostream& os, float16_t value) { + uint16_t temp = 0; + memcpy(&temp, &value, sizeof(float16_t)); + std::stringstream ss; + ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp; + os << ss.str(); + return os; +} #endif "#; diff --git a/stdarch/crates/intrinsic-test/src/common/argument.rs b/stdarch/crates/intrinsic-test/src/common/argument.rs index 0ab01e4144c5e..986c383ee106d 100644 --- a/stdarch/crates/intrinsic-test/src/common/argument.rs +++ b/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -33,6 +33,10 @@ where self.ty.c_type() } + pub fn generate_name(&self) -> String { + format!("{}_val", self.name) + } + pub fn is_simd(&self) -> bool { self.ty.is_simd() } @@ -64,7 +68,7 @@ where } fn as_call_param_c(&self) -> String { - self.ty.as_call_param_c(&self.name) + self.ty.as_call_param_c(&self.generate_name()) } } @@ -91,7 +95,7 @@ where pub fn as_call_param_rust(&self) -> String { self.iter() .filter(|a| !a.has_constraint()) - .map(|arg| arg.name.clone()) + .map(|arg| arg.generate_name()) .collect::>() .join(", ") } @@ -112,7 +116,7 @@ where w, "{indentation}alignas(64) const {ty} {name}_vals[] = {values};", ty = arg.ty.c_scalar_type(), - name = arg.name, + name = arg.generate_name(), values = arg.ty.populate_random(indentation, loads, &Language::C) )? } @@ -155,7 +159,7 @@ where format!( "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i]));\n", ty = arg.to_c_type(), - name = arg.name, + name = arg.generate_name(), load = if arg.is_simd() { arg.ty.get_load_function(Language::C) } else { @@ -175,7 +179,7 @@ where .map(|arg| { format!( "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i));\n", - name = arg.name, + name = arg.generate_name(), vals_name = arg.rust_vals_array_name(), load = if arg.is_simd() { arg.ty.get_load_function(Language::Rust) diff --git a/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/stdarch/crates/intrinsic-test/src/common/gen_c.rs index 25e4e210c397a..aeb94176f5320 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_c.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_c.rs @@ -48,7 +48,11 @@ pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( let ty = current.ty.c_type(); writeln!(w, "{indentation}{{")?; - writeln!(w, "{body_indentation}const {ty} {} = {i};", current.name)?; + writeln!( + w, + "{body_indentation}const {ty} {} = {i};", + current.generate_name() + )?; generate_c_constraint_blocks( w, @@ -115,9 +119,6 @@ template T1 cast(T2 x) {{ memcpy(&ret, &x, sizeof(T1)); return ret; }} - -std::ostream& operator<<(std::ostream& os, float16_t value); - "# )?; @@ -138,20 +139,6 @@ pub fn write_main_cpp<'a>( writeln!(w, "#include <{header}>")?; } - writeln!( - w, - r#" -std::ostream& operator<<(std::ostream& os, float16_t value) {{ - uint16_t temp = 0; - memcpy(&temp, &value, sizeof(float16_t)); - std::stringstream ss; - ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp; - os << ss.str(); - return os; -}} -"# - )?; - // NOTE: It's assumed that this value contains the required `ifdef`s. writeln!(w, "{arch_specific_definitions }")?; diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 7403b81df83fb..5d930eea2faac 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -1,3 +1,4 @@ +use std::cmp; use std::fmt; use std::ops::Deref; use std::str::FromStr; @@ -131,7 +132,7 @@ impl IntrinsicType { pub fn inner_size(&self) -> u32 { if let Some(bl) = self.bit_len { - bl + cmp::max(bl, 8) } else { unreachable!("{:#?}", self) } @@ -216,7 +217,7 @@ impl IntrinsicType { ) -> String { match self { IntrinsicType { - bit_len: Some(bit_len @ (8 | 16 | 32 | 64)), + bit_len: Some(bit_len @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 16 | 32 | 64)), kind: kind @ (TypeKind::Int(_) | TypeKind::Poly | TypeKind::Char(_) | TypeKind::Mask), simd_len, diff --git a/stdarch/crates/intrinsic-test/src/common/values.rs b/stdarch/crates/intrinsic-test/src/common/values.rs index 1b614a742ef8b..6c94ef2c22e1d 100644 --- a/stdarch/crates/intrinsic-test/src/common/values.rs +++ b/stdarch/crates/intrinsic-test/src/common/values.rs @@ -4,6 +4,13 @@ pub fn value_for_array(bits: u32, index: u32) -> u64 { let index = index as usize; match bits { + 1 => VALUES_8[index % 2].into(), + 2 => VALUES_8[index % 4].into(), + 3 => VALUES_8[index % 8].into(), + 4 => VALUES_8[index % 16].into(), + 5 => VALUES_5[index % VALUES_5.len()].into(), + 6 => VALUES_6[index % VALUES_6.len()].into(), + 7 => VALUES_7[index % VALUES_7.len()].into(), 8 => VALUES_8[index % VALUES_8.len()].into(), 16 => VALUES_16[index % VALUES_16.len()].into(), 32 => VALUES_32[index % VALUES_32.len()].into(), @@ -12,6 +19,24 @@ pub fn value_for_array(bits: u32, index: u32) -> u64 { } } +pub const VALUES_5: &[u8] = &[ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x019, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, + 0x1f, +]; + +pub const VALUES_6: &[u8] = &[ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x039, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, + 0x3f, +]; + +pub const VALUES_7: &[u8] = &[ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x079, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, + 0x7f, +]; + pub const VALUES_8: &[u8] = &[ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xf0, 0x80, 0x3b, 0xff, diff --git a/stdarch/crates/intrinsic-test/src/x86/compile.rs b/stdarch/crates/intrinsic-test/src/x86/compile.rs index 9f3a76c4c1bc2..6eaab8615059e 100644 --- a/stdarch/crates/intrinsic-test/src/x86/compile.rs +++ b/stdarch/crates/intrinsic-test/src/x86/compile.rs @@ -19,6 +19,12 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { "-mavx2", "-mavx512f", "-msse2", + "-mavx512vl", + "-mavx512bw", + "-mavx512dq", + "-mavx512cd", + "-mavx512fp16", + "-ferror-limit=1000", ]); if !cpp_compiler.contains("clang") { diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 58fabcbd0ebdf..32a9b586c0be5 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -10,13 +10,24 @@ struct Hex(T); "#; pub const LANE_FUNCTION_HELPERS: &str = r#" -typedef float float16_t; +typedef _Float16 float16_t; typedef float float32_t; typedef double float64_t; #define __int64 long long #define __int32 int +std::ostream& operator<<(std::ostream& os, _Float16 value); + +std::ostream& operator<<(std::ostream& os, _Float16 value) { + uint16_t temp = 0; + memcpy(&temp, &value, sizeof(_Float16)); + std::stringstream ss; + ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp; + os << ss.str(); + return os; +} + #define _mm512_extract_intrinsic_test_epi8(m, lane) \ _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 99c52551ad772..dfaf2adaf4c25 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -13,7 +13,17 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { /// Gets a string containing the type in C format. /// This function assumes that this value is present in the metadata hashmap. fn c_type(&self) -> String { - self.param.type_data.clone() + self.param + .type_data + .replace("unsigned __int64", "uint64_t") + .replace("unsigned __int32", "uint32_t") + .replace("unsigned __int16", "uint16_t") + .replace("unsigned __int8", "uint8_t") + .replace("__int64", "int64_t") + .replace("__int32", "int32_t") + .replace("__int16", "int16_t") + .replace("__int8", "int8_t") + .replace("const ", "") } fn c_single_vector_type(&self) -> String { @@ -109,17 +119,22 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { .chars() .filter(|c| c.is_numeric()) .join("") - .replace("128", ""); + .replace("128", "") + .replace("64", ""); { - if type_value.ends_with("d") { - format!("_mm{type_val_filtered}_loadu_pd") - } else if type_value.ends_with("h") { - format!("_mm{type_val_filtered}_loadu_ph") - } else if type_value.ends_with("i") { - format!("_mm{type_val_filtered}_loadu_epi16") - } else { - format!("_mm{type_val_filtered}_loadu_ps") - } + let suffix = match (self.bit_len, self.kind) { + (Some(bit_len @ (8 | 16 | 32 | 64)), TypeKind::Int(_)) => { + format!("epi{bit_len}") + } + (Some(16), TypeKind::Float) => format!("ph"), + (Some(32), TypeKind::Float) => format!("ps"), + (Some(64), TypeKind::Float) => format!("pd"), + (Some(128), TypeKind::Vector) => format!("si128"), + (Some(256), TypeKind::Vector) => format!("si256"), + (Some(512), TypeKind::Vector) => format!("si512"), + _ => unreachable!("Invalid element type for a vector type! {:?}", self.param), + }; + format!("_mm{type_val_filtered}_loadu_{suffix}") } } else { // if it is a pointer, then rely on type conversion @@ -366,6 +381,15 @@ impl X86IntrinsicType { data.bit_len = Some(32); } + // default settings for IMM parameters + if param.etype == "IMM" && param.imm_width > 0 { + data.bit_len = Some(param.imm_width); + } + + if param.etype == "IMM" || param.imm_width > 0 || param.imm_type.len() > 0 { + data.constant = true; + } + // if param.etype == IMM, then it is a constant. // else it stays unchanged. data.constant |= param.etype == "IMM"; diff --git a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs index 808f594a8c728..157a37fc9df4d 100644 --- a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs @@ -48,6 +48,8 @@ pub struct Parameter { pub etype: String, #[serde(rename = "@memwidth", default, deserialize_with = "string_to_u32")] pub memwidth: u32, + #[serde(rename = "@immwidth", default, deserialize_with = "string_to_u32")] + pub imm_width: u32, #[serde(rename = "@immtype", default)] pub imm_type: String, } From fc39b5cedf8b89a7b75753a1e37de9871a2f5f08 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 24 Sep 2025 22:07:45 +0530 Subject: [PATCH 183/358] chore: Ensuring "const" appears for constant arguments to intrinsics. Extra changes: 1. Using "as _" to allow for implicit typecasting --- .../intrinsic-test/src/common/argument.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/argument.rs b/stdarch/crates/intrinsic-test/src/common/argument.rs index 986c383ee106d..5963abef2f952 100644 --- a/stdarch/crates/intrinsic-test/src/common/argument.rs +++ b/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -30,7 +30,8 @@ where } pub fn to_c_type(&self) -> String { - self.ty.c_type() + let prefix = if self.ty.constant { "const " } else { "" }; + format!("{}{}", prefix, self.ty.c_type()) } pub fn generate_name(&self) -> String { @@ -95,7 +96,7 @@ where pub fn as_call_param_rust(&self) -> String { self.iter() .filter(|a| !a.has_constraint()) - .map(|arg| arg.generate_name()) + .map(|arg| arg.generate_name() + " as _") .collect::>() .join(", ") } @@ -177,15 +178,16 @@ where self.iter() .filter(|&arg| !arg.has_constraint()) .map(|arg| { + let load = if arg.is_simd() { + arg.ty.get_load_function(Language::Rust) + } else { + "*".to_string() + }; + let typecast = if load.len() > 2 { "as _" } else { "" }; format!( - "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i));\n", + "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i){typecast});\n", name = arg.generate_name(), vals_name = arg.rust_vals_array_name(), - load = if arg.is_simd() { - arg.ty.get_load_function(Language::Rust) - } else { - "*".to_string() - }, ) }) .collect() From 93fad84f739248dffa86ea6df4d7b2878d25610d Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 24 Sep 2025 22:10:34 +0530 Subject: [PATCH 184/358] chore: allowing cast() function to allow implicity type conversion for certain cases (like uint32_t to uint64_t) extras: 1. added more C++ headers 2. typecasting integer constants (for example, the MM_FROUND arguments) for type compatibility --- .../crates/intrinsic-test/src/common/gen_c.rs | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/stdarch/crates/intrinsic-test/src/common/gen_c.rs index aeb94176f5320..965e229da509f 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_c.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_c.rs @@ -6,7 +6,15 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; // The number of times each intrinsic will be called. const PASSES: u32 = 20; -const COMMON_HEADERS: [&str; 5] = ["iostream", "string", "cstring", "iomanip", "sstream"]; +const COMMON_HEADERS: [&str; 7] = [ + "iostream", + "string", + "cstring", + "iomanip", + "sstream", + "type_traits", + "cassert", +]; pub fn generate_c_test_loop( w: &mut impl std::io::Write, @@ -48,9 +56,13 @@ pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( let ty = current.ty.c_type(); writeln!(w, "{indentation}{{")?; + + // TODO: Move to actually specifying the enum value + // instead of typecasting integers, for better clarity + // of generated code. writeln!( w, - "{body_indentation}const {ty} {} = {i};", + "{body_indentation}const {ty} {} = ({ty}){i};", current.generate_name() )?; @@ -113,11 +125,17 @@ pub fn write_mod_cpp( writeln!( w, r#" +// T1 is the `To` type, T2 is the `From` type template T1 cast(T2 x) {{ - static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); - T1 ret{{}}; - memcpy(&ret, &x, sizeof(T1)); - return ret; + if (std::is_convertible::value) {{ + return x; + }} else if (sizeof(T1) == sizeof(T2)) {{ + T1 ret{{}}; + memcpy(&ret, &x, sizeof(T1)); + return ret; + }} else {{ + assert("T2 must either be convertable to T1, or have the same size as T1!"); + }} }} "# )?; From d0644456bb1cac7bd833b478e2cfd1433d114d20 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 24 Sep 2025 22:13:10 +0530 Subject: [PATCH 185/358] feat: matching the expected number of elements for array to load arguments, accommodating for signed variables too --- .../src/common/intrinsic_helpers.rs | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 5d930eea2faac..43a0e3f5d1633 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -284,6 +284,7 @@ impl IntrinsicType { kind: TypeKind::Vector, bit_len: Some(bit_len @ (128 | 256 | 512)), simd_len, + vec_len, .. } => { let (prefix, suffix) = match language { @@ -292,14 +293,27 @@ impl IntrinsicType { }; let body_indentation = indentation.nested(); let effective_bit_len = 32; - let effective_vec_len = bit_len / effective_bit_len; format!( "{prefix}\n{body}\n{indentation}{suffix}", - body = (0..(simd_len.unwrap_or(1) * effective_vec_len + loads - 1)) + body = (0..(vec_len.unwrap_or(1) * simd_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| { let src = value_for_array(effective_bit_len, i); - assert!(src == 0 || src.ilog2() < *bit_len); - fmt(&format_args!("{body_indentation}{src:#x}")) + assert!(src == 0 || src.ilog2() < effective_bit_len); + if (src >> (effective_bit_len - 1)) != 0 { + // `src` is a two's complement representation of a negative value. + let mask = !0u64 >> (64 - effective_bit_len); + let ones_compl = src ^ mask; + let twos_compl = ones_compl + 1; + if (twos_compl == src) && (language == &Language::C) { + // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid + // undefined literal overflow behaviour. + fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1")) + } else { + fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) + } + } else { + fmt(&format_args!("{body_indentation}{src:#x}")) + } }) ) } From c35122e1bfabbfce5dad1ad2d705d7fdd97640ad Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 24 Sep 2025 22:14:20 +0530 Subject: [PATCH 186/358] feat: updated with debug printing and ostream implementation for vector types --- .../crates/intrinsic-test/src/x86/config.rs | 100 +++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 32a9b586c0be5..a199a58ff2706 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -5,8 +5,64 @@ pub const NOTICE: &str = "\ // Format f16 values (and vectors containing them) in a way that is consistent with C. pub const F16_FORMATTING_DEF: &str = r#" +use std::arch::x86_64::*; + +#[inline] +fn debug_simd_finish( + formatter: &mut core::fmt::Formatter<'_>, + type_name: &str, + array: &[T; N], +) -> core::fmt::Result { + core::fmt::Formatter::debug_tuple_fields_finish( + formatter, + type_name, + &core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]), + ) +} + #[repr(transparent)] struct Hex(T); + +impl core::fmt::Debug for Hex { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + ::fmt(&self.0, f) + } +} + +fn debug_f16(x: T) -> impl core::fmt::Debug { + Hex(x) +} + +trait DebugHexF16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result; +} + +impl DebugHexF16 for f16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{:#06x?}", self.to_bits()) + } +} + +impl DebugHexF16 for __m128h { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [Hex; 8]>(*self) }; + debug_simd_finish(f, "__m128h", &array) + } +} + +impl DebugHexF16 for __m256h { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [Hex; 16]>(*self) }; + debug_simd_finish(f, "__m256h", &array) + } +} + +impl DebugHexF16 for __m512h { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [Hex; 32]>(*self) }; + debug_simd_finish(f, "__m512h", &array) + } +} "#; pub const LANE_FUNCTION_HELPERS: &str = r#" @@ -18,6 +74,9 @@ typedef double float64_t; #define __int32 int std::ostream& operator<<(std::ostream& os, _Float16 value); +std::ostream& operator<<(std::ostream& os, __m128i value); +std::ostream& operator<<(std::ostream& os, __m256i value); +std::ostream& operator<<(std::ostream& os, __m512i value); std::ostream& operator<<(std::ostream& os, _Float16 value) { uint16_t temp = 0; @@ -28,6 +87,45 @@ std::ostream& operator<<(std::ostream& os, _Float16 value) { return os; } +std::ostream& operator<<(std::ostream& os, __m128i value) { + void* temp = malloc(sizeof(__m128i)); + _mm_storeu_si128((__m128i*)temp, value); + std::stringstream ss; + + ss << "0x"; + for(int i = 0; i < 16; i++) { + ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; + } + os << ss.str(); + return os; +} + +std::ostream& operator<<(std::ostream& os, __m256i value) { + void* temp = malloc(sizeof(__m256i)); + _mm256_storeu_si256((__m256i*)temp, value); + std::stringstream ss; + + ss << "0x"; + for(int i = 0; i < 32; i++) { + ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; + } + os << ss.str(); + return os; +} + +std::ostream& operator<<(std::ostream& os, __m512i value) { + void* temp = malloc(sizeof(__m512i)); + _mm512_storeu_si512((__m512i*)temp, value); + std::stringstream ss; + + ss << "0x"; + for(int i = 0; i < 64; i++) { + ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; + } + os << ss.str(); + return os; +} + #define _mm512_extract_intrinsic_test_epi8(m, lane) \ _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) @@ -55,8 +153,6 @@ pub const X86_CONFIGURATIONS: &str = r#" #![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))] #![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] #![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] -#![cfg_attr(target_arch = "x86_64", feature(sse))] -#![cfg_attr(target_arch = "x86_64", feature(sse2))] #![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))] #![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))] #![feature(fmt_helpers_for_derive)] From e9eff4f3eabaca268f78739389da43a0d26aaa7e Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 24 Sep 2025 22:15:39 +0530 Subject: [PATCH 187/358] chore: corrected the legal range of values for constrained arguments such as _MM_FROUND_SAE and _MM_ROUND_MODE --- .../intrinsic-test/src/x86/constraint.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/constraint.rs b/stdarch/crates/intrinsic-test/src/x86/constraint.rs index 1f0698838885e..72f5da3b3faf3 100644 --- a/stdarch/crates/intrinsic-test/src/x86/constraint.rs +++ b/stdarch/crates/intrinsic-test/src/x86/constraint.rs @@ -1,19 +1,29 @@ use crate::common::constraint::Constraint; -pub fn map_constraints(imm_type: &String) -> Option { +pub fn map_constraints(imm_type: &String, imm_width: u32) -> Option { + if imm_width > 0 { + let max: i64 = 2i64.pow(imm_width); + return Some(Constraint::Range(0..max)); + } match imm_type.as_str() { - "_MM_FROUND" => Some(Constraint::Range(0..4)), + // Legal values for variables of `_MM_FROUND` type are: + // 8 => (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions + // 9 => (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions + // 10 => (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions + // 11 => (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions + // 4 => _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE + "_MM_FROUND" => Some(Constraint::Set(vec![4, 8, 9, 10, 11])), "_MM_INDEX_SCALE" => Some(Constraint::Set(vec![1, 2, 4, 8])), "_MM_CMPINT" => Some(Constraint::Range(0..8)), "_MM_REDUCE" => Some(Constraint::Range(0..8)), - "_MM_FROUND_SAE" => Some(Constraint::Range(0..8)), + "_MM_FROUND_SAE" => Some(Constraint::Equal(8)), "_MM_MANTISSA_NORM" => Some(Constraint::Range(0..4)), "_MM_MANTISSA_NORM_ENUM" => Some(Constraint::Range(0..4)), "_MM_MANTISSA_SIGN" => Some(Constraint::Range(0..3)), "_MM_PERM" => Some(Constraint::Range(0..256)), "_MM_PERM_ENUM" => Some(Constraint::Range(0..256)), "_MM_CMPINT_ENUM" => Some(Constraint::Range(0..8)), - "_MM_ROUND_MODE" => Some(Constraint::Set(vec![0, 0x2000, 0x4000, 0x6000])), + "_MM_ROUND_MODE" => Some(Constraint::Set(vec![0, 0x2, 0x4, 0x6])), "_CMP_" => Some(Constraint::Range(0..32)), _ => None, } From 5bf09d8d89a7c5fa7afe47baad14200a00394695 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 24 Sep 2025 22:16:23 +0530 Subject: [PATCH 188/358] feat: filter for duplicates in the definition of intrinsics --- stdarch/crates/intrinsic-test/src/x86/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index e4c9742f8d2c4..d5ebd960b30c3 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -11,6 +11,7 @@ use crate::common::compile_c::CppCompilation; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::X86IntrinsicType; +use itertools::Itertools; use xml_parser::get_xml_intrinsics; pub struct X86ArchitectureTest { @@ -58,6 +59,7 @@ impl SupportedArchitectureTest for X86ArchitectureTest { .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) .filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128)) .filter(|i| !cli_options.skip.contains(&i.name)) + .unique_by(|i| i.name.clone()) .collect::>(); intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); From dbc2f0f50fb3bf7e2b69a6c516f2baa9ee0fec5e Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 24 Sep 2025 22:23:30 +0530 Subject: [PATCH 189/358] chore: vector types cannot be the type of an individual element in an array. Extra: 1. Added better load fuctions 2. Added an update_simd_len() function to support cases where the bit_len of the element need to be inferred from its partner arguments before calculating the simd_len --- .../crates/intrinsic-test/src/x86/types.rs | 113 ++++++++++++------ 1 file changed, 77 insertions(+), 36 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index dfaf2adaf4c25..b07726656ade2 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -115,6 +115,10 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { // if "type" starts with __m{h/i/}, // then use either _mm_set1_epi64, // _mm256_set1_epi64 or _mm512_set1_epi64 + if type_value.contains("__m64") { + return String::from("*(__m64*)"); + } + let type_val_filtered = type_value .chars() .filter(|c| c.is_numeric()) @@ -126,12 +130,11 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { (Some(bit_len @ (8 | 16 | 32 | 64)), TypeKind::Int(_)) => { format!("epi{bit_len}") } + (Some(bit_len), TypeKind::Mask) => format!("epi{bit_len}"), (Some(16), TypeKind::Float) => format!("ph"), (Some(32), TypeKind::Float) => format!("ps"), (Some(64), TypeKind::Float) => format!("pd"), - (Some(128), TypeKind::Vector) => format!("si128"), - (Some(256), TypeKind::Vector) => format!("si256"), - (Some(512), TypeKind::Vector) => format!("si512"), + (Some(128 | 256 | 512), TypeKind::Vector) => format!("epi32"), _ => unreachable!("Invalid element type for a vector type! {:?}", self.param), }; format!("_mm{type_val_filtered}_loadu_{suffix}") @@ -252,17 +255,18 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { } fn rust_scalar_type(&self) -> String { - let re = Regex::new(r"\__m\d+[a-z]*").unwrap(); - if let Some(match_type) = re.find(self.param.type_data.as_str()) { - match_type.as_str().to_string() - } else { - let prefix = match self.data.kind { - TypeKind::Mask => String::from("__mmask"), - _ => self.kind().rust_prefix().to_string(), - }; + let prefix = match self.data.kind { + TypeKind::Mask => String::from("__mmask"), + TypeKind::Vector => String::from("i"), + _ => self.kind().rust_prefix().to_string(), + }; - format!("{prefix}{bits}", bits = self.inner_size()) - } + let bits = if self.inner_size() >= 128 { + 32 + } else { + self.inner_size() + }; + format!("{prefix}{bits}") } } @@ -311,6 +315,26 @@ impl X86IntrinsicType { }) } + pub fn update_simd_len(&mut self) { + let mut type_processed = self.param.type_data.clone(); + type_processed.retain(|c| c.is_numeric()); + + // check the param.type and extract numeric part if there are double + // underscores. divide this number with bit-len and set this as simd-len. + // Only __m types can have a simd-len. + if self.param.type_data.contains("__m") && !self.param.type_data.contains("__mmask") { + self.data.simd_len = match str::parse::(type_processed.as_str()) { + // If bit_len is None, simd_len will be None. + // Else simd_len will be (num_bits / bit_len). + Ok(num_bits) => self + .data + .bit_len + .and_then(|bit_len| Some(num_bits / bit_len)), + Err(_) => None, + }; + } + } + pub fn from_param(param: &Parameter) -> Result { match Self::from_c(param.type_data.as_str()) { Err(message) => Err(message), @@ -350,22 +374,26 @@ impl X86IntrinsicType { } } - if param.type_data.matches("__mmask").next().is_some() { + if param.type_data.contains("__mmask") { data.bit_len = str::parse::(type_processed.as_str()).ok(); } - // then check the param.type and extract numeric part if there are double - // underscores. divide this number with bit-len and set this as simd-len. - // Only __m types can have a simd-len. - if param.type_data.matches("__m").next().is_some() - && param.type_data.matches("__mmask").next().is_none() - { - data.simd_len = match str::parse::(type_processed.as_str()) { - // If bit_len is None, simd_len will be None. - // Else simd_len will be (num_bits / bit_len). - Ok(num_bits) => data.bit_len.and_then(|bit_len| Some(num_bits / bit_len)), - Err(_) => None, - }; + if vec!["M512", "M256", "M128"].contains(¶m.etype.as_str()) { + match param.type_data.chars().last() { + Some('i') => { + data.kind = TypeKind::Int(Sign::Signed); + data.bit_len = Some(32); + } + Some('h') => { + data.kind = TypeKind::Float; + data.bit_len = Some(16); + } + Some('d') => { + data.kind = TypeKind::Float; + data.bit_len = Some(64); + } + _ => (), + } } // default settings for "void *" parameters @@ -381,22 +409,35 @@ impl X86IntrinsicType { data.bit_len = Some(32); } - // default settings for IMM parameters - if param.etype == "IMM" && param.imm_width > 0 { - data.bit_len = Some(param.imm_width); - } - if param.etype == "IMM" || param.imm_width > 0 || param.imm_type.len() > 0 { + data.kind = TypeKind::Int(Sign::Unsigned); data.constant = true; } - // if param.etype == IMM, then it is a constant. - // else it stays unchanged. - data.constant |= param.etype == "IMM"; - Ok(X86IntrinsicType { + // Rust defaults to signed variants, unless they are explicitly mentioned + // the `type` field are C++ types. + if data.kind == TypeKind::Int(Sign::Unsigned) + && !(param.type_data.contains("unsigned") || param.type_data.contains("uint")) + { + data.kind = TypeKind::Int(Sign::Signed) + } + + // default settings for IMM parameters + if param.etype == "IMM" { + data.bit_len = if param.imm_width > 0 { + Some(param.imm_width) + } else { + Some(8) + } + } + + let mut result = X86IntrinsicType { data, param: param.clone(), - }) + }; + + result.update_simd_len(); + Ok(result) } } // Tile types won't currently reach here, since the intrinsic that involve them From c7220b334cb550e62760b65e569a06f139806a65 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 24 Sep 2025 22:25:58 +0530 Subject: [PATCH 190/358] chore: accomodate for `immwidth` field for constraints extras: 1. call update_simd_len() after inferring bit_len for arguments of certain intrinsics 2. handle the effective bit_len for _mm_mpsadbw_epu8 intrinsic's `imm8` argument which has only 3 bits that are used --- stdarch/crates/intrinsic-test/src/x86/xml_parser.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs index 157a37fc9df4d..90bafbee54353 100644 --- a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs @@ -84,7 +84,7 @@ fn xml_to_intrinsic( if ty.is_err() { None } else { - let constraint = map_constraints(¶m.imm_type); + let constraint = map_constraints(¶m.imm_type, param.imm_width); let arg = Argument::::new( i, param.var_name.clone(), @@ -117,11 +117,20 @@ fn xml_to_intrinsic( args[index].ty.bit_len = args[0].ty.bit_len; } + args.iter_mut().for_each(|arg| arg.ty.update_simd_len()); + + if name == "_mm_mpsadbw_epu8" { + args.iter_mut() + .filter(|arg| arg.name.contains("imm8")) + .for_each(|arg| arg.ty.bit_len = Some(3)); + } + let arguments = ArgumentList:: { args }; if let Err(message) = result { return Err(Box::from(message)); } + Ok(Intrinsic { name, arguments, From 4c0061e0b0b7792d10b55fe08ac8c0df6ee54284 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 25 Sep 2025 00:52:58 +0530 Subject: [PATCH 191/358] feat: defined more load functions that are natively not defined (such as arguments with UI16 etype and __m128d type) --- .../crates/intrinsic-test/src/x86/config.rs | 143 ++++++++++++++++++ .../crates/intrinsic-test/src/x86/types.rs | 24 +++ 2 files changed, 167 insertions(+) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index a199a58ff2706..76317db5e4b0b 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -7,6 +7,128 @@ pub const NOTICE: &str = "\ pub const F16_FORMATTING_DEF: &str = r#" use std::arch::x86_64::*; +#[inline] +unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { + _mm_castph_si128(_mm_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { + _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { + _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) +} + + +#[inline] +unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { + _mm_castps_ph(_mm_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { + _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { + _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) +} + +// === +#[inline] +unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) +} + #[inline] fn debug_simd_finish( formatter: &mut core::fmt::Formatter<'_>, @@ -50,6 +172,13 @@ impl DebugHexF16 for __m128h { } } +impl DebugHexF16 for __m128i { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [Hex; 8]>(*self) }; + debug_simd_finish(f, "__m128i", &array) + } +} + impl DebugHexF16 for __m256h { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let array = unsafe { core::mem::transmute::<_, [Hex; 16]>(*self) }; @@ -57,12 +186,26 @@ impl DebugHexF16 for __m256h { } } +impl DebugHexF16 for __m256i { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [Hex; 16]>(*self) }; + debug_simd_finish(f, "__m256i", &array) + } +} + impl DebugHexF16 for __m512h { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let array = unsafe { core::mem::transmute::<_, [Hex; 32]>(*self) }; debug_simd_finish(f, "__m512h", &array) } } + +impl DebugHexF16 for __m512i { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [Hex; 32]>(*self) }; + debug_simd_finish(f, "__m512i", &array) + } +} "#; pub const LANE_FUNCTION_HELPERS: &str = r#" diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index b07726656ade2..e4b6e128761b3 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -127,6 +127,30 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { .replace("64", ""); { let suffix = match (self.bit_len, self.kind) { + (Some(16), TypeKind::Float) + if ["__m128i", "__m256i", "__m512i"] + .contains(&self.param.type_data.as_str()) => + { + format!("ph_to_{}", self.param.type_data) + } + (Some(32), TypeKind::Float) + if ["__m128h", "__m256h", "__m512h"] + .contains(&self.param.type_data.as_str()) => + { + format!("ps_to_{}", self.param.type_data) + } + (Some(bit_len @ (16 | 32 | 64)), TypeKind::Int(_) | TypeKind::Mask) + if ["__m128d", "__m256d", "__m512d"] + .contains(&self.param.type_data.as_str()) => + { + format!("epi{bit_len}_to_{}", self.param.type_data) + } + (Some(bit_len @ (16 | 32 | 64)), TypeKind::Int(_) | TypeKind::Mask) + if ["__m128", "__m256", "__m512"] + .contains(&self.param.type_data.as_str()) => + { + format!("epi{bit_len}_to_{}", self.param.type_data) + } (Some(bit_len @ (8 | 16 | 32 | 64)), TypeKind::Int(_)) => { format!("epi{bit_len}") } From 5207be69befca659157db9b95a3aaff59d1fd110 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 25 Sep 2025 00:53:36 +0530 Subject: [PATCH 192/358] chore: corrected the imm-width correction location for _mm_mpsadbw_epu8 intrinsic --- stdarch/crates/intrinsic-test/src/x86/xml_parser.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs index 90bafbee54353..af85118b8aacd 100644 --- a/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs @@ -84,7 +84,12 @@ fn xml_to_intrinsic( if ty.is_err() { None } else { - let constraint = map_constraints(¶m.imm_type, param.imm_width); + let effective_imm_width = if name == "_mm_mpsadbw_epu8" && param.var_name == "imm8" { + 3 + } else { + param.imm_width + }; + let constraint = map_constraints(¶m.imm_type, effective_imm_width); let arg = Argument::::new( i, param.var_name.clone(), @@ -119,12 +124,6 @@ fn xml_to_intrinsic( args.iter_mut().for_each(|arg| arg.ty.update_simd_len()); - if name == "_mm_mpsadbw_epu8" { - args.iter_mut() - .filter(|arg| arg.name.contains("imm8")) - .for_each(|arg| arg.ty.bit_len = Some(3)); - } - let arguments = ArgumentList:: { args }; if let Err(message) = result { From 608e375edf608071e86abc7fc81f6834257f84bc Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 25 Sep 2025 00:57:01 +0530 Subject: [PATCH 193/358] feat: added exclusion list to intrinsic-test CI pipeline --- stdarch/ci/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/stdarch/ci/run.sh b/stdarch/ci/run.sh index d8af9b76977bc..dc5a78723bfe2 100755 --- a/stdarch/ci/run.sh +++ b/stdarch/ci/run.sh @@ -93,6 +93,7 @@ case ${TARGET} in TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/" TEST_CXX_COMPILER="clang++-19" TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt export STDARCH_DISABLE_ASSERT_INSTR=1 export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" From 4443ec2c9cc259088f44be12eb719ef56494cee6 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 25 Sep 2025 01:01:40 +0530 Subject: [PATCH 194/358] chore: clean up unused variables --- stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 43a0e3f5d1633..c52bccb693d89 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -282,7 +282,7 @@ impl IntrinsicType { } IntrinsicType { kind: TypeKind::Vector, - bit_len: Some(bit_len @ (128 | 256 | 512)), + bit_len: Some(128 | 256 | 512), simd_len, vec_len, .. From fbfe9dadf8dad6279ad9e54233575bc3d7587ccf Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sat, 27 Sep 2025 15:29:54 +0530 Subject: [PATCH 195/358] feat: moved cast to architecture-specific definitions --- .../crates/intrinsic-test/src/arm/config.rs | 8 ++++++++ .../crates/intrinsic-test/src/common/gen_c.rs | 18 ------------------ .../crates/intrinsic-test/src/x86/config.rs | 13 +++++++++++++ 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/config.rs b/stdarch/crates/intrinsic-test/src/arm/config.rs index 46706e009eba1..daef7b9b0eee2 100644 --- a/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -8,6 +8,14 @@ pub const POLY128_OSTREAM_DECL: &str = r#" std::ostream& operator<<(std::ostream& os, poly128_t value); std::ostream& operator<<(std::ostream& os, float16_t value); #endif + +// T1 is the `To` type, T2 is the `From` type +template T1 cast(T2 x) {{ + static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); + T1 ret{{}}; + memcpy(&ret, &x, sizeof(T1)); + return ret; +}} "#; pub const POLY128_OSTREAM_DEF: &str = r#" diff --git a/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/stdarch/crates/intrinsic-test/src/common/gen_c.rs index 965e229da509f..04741e4f80757 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_c.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_c.rs @@ -122,24 +122,6 @@ pub fn write_mod_cpp( writeln!(w, "{}", forward_declarations)?; - writeln!( - w, - r#" -// T1 is the `To` type, T2 is the `From` type -template T1 cast(T2 x) {{ - if (std::is_convertible::value) {{ - return x; - }} else if (sizeof(T1) == sizeof(T2)) {{ - T1 ret{{}}; - memcpy(&ret, &x, sizeof(T1)); - return ret; - }} else {{ - assert("T2 must either be convertable to T1, or have the same size as T1!"); - }} -}} -"# - )?; - for intrinsic in intrinsics { create_c_test_function(w, intrinsic)?; } diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 76317db5e4b0b..cb4c99406698b 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -269,6 +269,19 @@ std::ostream& operator<<(std::ostream& os, __m512i value) { return os; } +// T1 is the `To` type, T2 is the `From` type +template T1 cast(T2 x) {{ + if (std::is_convertible::value) {{ + return x; + }} else if (sizeof(T1) == sizeof(T2)) {{ + T1 ret{{}}; + memcpy(&ret, &x, sizeof(T1)); + return ret; + }} else {{ + assert("T2 must either be convertable to T1, or have the same size as T1!"); + }} +}} + #define _mm512_extract_intrinsic_test_epi8(m, lane) \ _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) From 9d7e72ef24cc1d1f8159721515027ca0c1ac2c07 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sat, 27 Sep 2025 16:27:13 +0530 Subject: [PATCH 196/358] fix: remove extra brackets for cast definition in arm/config.rs --- stdarch/crates/intrinsic-test/src/arm/config.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/config.rs b/stdarch/crates/intrinsic-test/src/arm/config.rs index daef7b9b0eee2..7421a6da38dde 100644 --- a/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -10,12 +10,12 @@ std::ostream& operator<<(std::ostream& os, float16_t value); #endif // T1 is the `To` type, T2 is the `From` type -template T1 cast(T2 x) {{ +template T1 cast(T2 x) { static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); - T1 ret{{}}; + T1 ret{}; memcpy(&ret, &x, sizeof(T1)); return ret; -}} +} "#; pub const POLY128_OSTREAM_DEF: &str = r#" From 622b18f2bb4fa1c5a17621a8c276f7e6267c3c35 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sat, 27 Sep 2025 16:34:08 +0530 Subject: [PATCH 197/358] make `std::ostream& operator<<(std::ostream& os, float16_t value);` definition available for armv7 also --- stdarch/crates/intrinsic-test/src/arm/config.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/config.rs b/stdarch/crates/intrinsic-test/src/arm/config.rs index 7421a6da38dde..354d8f50b43b8 100644 --- a/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -6,9 +6,10 @@ pub const NOTICE: &str = "\ pub const POLY128_OSTREAM_DECL: &str = r#" #ifdef __aarch64__ std::ostream& operator<<(std::ostream& os, poly128_t value); -std::ostream& operator<<(std::ostream& os, float16_t value); #endif +std::ostream& operator<<(std::ostream& os, float16_t value); + // T1 is the `To` type, T2 is the `From` type template T1 cast(T2 x) { static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); @@ -33,6 +34,8 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) { return os; } +#endif + std::ostream& operator<<(std::ostream& os, float16_t value) { uint16_t temp = 0; memcpy(&temp, &value, sizeof(float16_t)); @@ -41,7 +44,6 @@ std::ostream& operator<<(std::ostream& os, float16_t value) { os << ss.str(); return os; } -#endif "#; // Format f16 values (and vectors containing them) in a way that is consistent with C. From c0b979baf0c6490fc9bdc69459a15659daaf6690 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sat, 27 Sep 2025 23:36:44 +0530 Subject: [PATCH 198/358] feat: add missing_x86.txt to filter out intrinsics that cannot be tested currently --- stdarch/crates/intrinsic-test/missing_x86.txt | 874 ++++++++++++++++++ 1 file changed, 874 insertions(+) create mode 100644 stdarch/crates/intrinsic-test/missing_x86.txt diff --git a/stdarch/crates/intrinsic-test/missing_x86.txt b/stdarch/crates/intrinsic-test/missing_x86.txt new file mode 100644 index 0000000000000..824d36f60528a --- /dev/null +++ b/stdarch/crates/intrinsic-test/missing_x86.txt @@ -0,0 +1,874 @@ +# Are defined under a similar name + +#__bswap_64 +_bswap64 + +# Provides pointer to allocated memory, which is difficult to test +_mm_malloc + +# requires target feature 'waitpkg', but would be inlined into function that is compiled without support for 'waitpkg' +_tpause +_umwait + +# `use of undeclared identifier` error in Clang +_bit_scan_forward +_bit_scan_reverse +_bswap +_castf32_u32 +_castf64_u64 +_castu32_f32 +_castu64_f64 +_lrotl +_lrotr +_may_i_use_cpu_feature +_may_i_use_cpu_feature_ext +_mm256_acos_pd +_mm256_acos_ph +_mm256_acos_ps +_mm256_acosh_pd +_mm256_acosh_ph +_mm256_acosh_ps +_mm256_asin_pd +_mm256_asin_ph +_mm256_asin_ps +_mm256_asinh_pd +_mm256_asinh_ph +_mm256_asinh_ps +_mm256_atan_pd +_mm256_atan_ps +_mm256_atan_ph +_mm256_atan2_pd +_mm256_atan2_ph +_mm256_atan2_ps +_mm256_atanh_pd +_mm256_atanh_ph +_mm256_atanh_ps +_mm256_cbrt_pd +_mm256_cbrt_ph +_mm256_cbrt_ps +_mm256_cdfnorm_pd +_mm256_cdfnorm_ph +_mm256_cdfnorm_ps +_mm256_cdfnorminv_pd +_mm256_cdfnorminv_ph +_mm256_cdfnorminv_ps +_mm256_cexp_ps +_mm256_cos_pd +_mm256_cos_ph +_mm256_cos_ps +_mm256_cosd_pd +_mm256_cosd_ph +_mm256_cosd_ps +_mm256_cosh_pd +_mm256_cosh_ph +_mm256_cosh_ps +_mm256_csqrt_ps +_mm256_div_epi16 +_mm256_div_epi32 +_mm256_div_epi64 +_mm256_div_epi8 +_mm256_div_epu16 +_mm256_div_epu32 +_mm256_div_epu64 +_mm256_div_epu8 +_mm256_dpbssd_epi32 +_mm256_dpbssds_epi32 +_mm256_dpbsud_epi32 +_mm256_dpbsuds_epi32 +_mm256_dpbuud_epi32 +_mm256_dpbuuds_epi32 +_mm256_dpwsud_epi32 +_mm256_dpwsuds_epi32 +_mm256_dpwusd_epi32 +_mm256_dpwusds_epi32 +_mm256_dpwuud_epi32 +_mm256_dpwuuds_epi32 +_mm256_erf_pd +_mm256_erf_ps +_mm256_erfc_pd +_mm256_erfc_ph +_mm256_erfc_ps +_mm256_erfcinv_pd +_mm256_erfcinv_ph +_mm256_erfcinv_ps +_mm256_erfinv_pd +_mm256_erfinv_ph +_mm256_erfinv_ps +_mm256_exp10_pd +_mm256_exp10_ph +_mm256_exp10_ps +_mm256_exp2_pd +_mm256_exp2_ph +_mm256_exp2_ps +_mm256_exp_pd +_mm256_exp_ph +_mm256_exp_ps +_mm256_expm1_pd +_mm256_expm1_ph +_mm256_expm1_ps +_mm256_hypot_pd +_mm256_hypot_ph +_mm256_hypot_ps +_mm256_idiv_epi32 +_mm256_invcbrt_pd +_mm256_invcbrt_ph +_mm256_invcbrt_ps +_mm256_invsqrt_pd +_mm256_invsqrt_ph +_mm256_invsqrt_ps +_mm256_irem_epi32 +_mm256_log10_pd +_mm256_log10_ph +_mm256_log10_ps +_mm256_log1p_pd +_mm256_log1p_ph +_mm256_log1p_ps +_mm256_log2_pd +_mm256_log2_ph +_mm256_log2_ps +_mm256_log_pd +_mm256_log_ph +_mm256_log_ps +_mm256_logb_pd +_mm256_logb_ph +_mm256_logb_ps +_mm256_clog_ps +_mm256_madd52hi_avx_epu64 +_mm256_madd52lo_avx_epu64 +_mm256_erf_ph +_mm256_mask_reduce_add_epi16 +_mm256_mask_reduce_add_epi8 +_mm256_mask_reduce_and_epi16 +_mm256_mask_reduce_and_epi8 +_mm256_mask_reduce_max_epi16 +_mm256_mask_reduce_max_epi8 +_mm256_mask_reduce_max_epu16 +_mm256_mask_reduce_max_epu8 +_mm256_mask_reduce_min_epi16 +_mm256_mask_reduce_min_epi8 +_mm256_mask_reduce_min_epu16 +_mm256_mask_reduce_min_epu8 +_mm256_mask_reduce_mul_epi16 +_mm256_mask_reduce_mul_epi8 +_mm256_mask_reduce_or_epi16 +_mm256_mask_reduce_or_epi8 +_mm512_cosd_ph +_mm512_cosd_ps +_mm512_cosh_pd +_mm512_cosh_ph +_mm512_cosh_ps +_mm512_div_epi16 +_mm512_div_epi32 +_mm512_div_epi64 +_mm512_div_epi8 +_mm512_div_epu16 +_mm512_div_epu32 +_mm512_div_epu64 +_mm512_div_epu8 +_mm512_erf_pd +_mm512_erf_ph +_mm512_erf_ps +_mm512_erfc_pd +_mm512_erfc_ph +_mm512_erfc_ps +_mm512_erfcinv_pd +_mm512_erfcinv_ph +_mm512_erfcinv_ps +_mm512_erfinv_pd +_mm512_erfinv_ph +_mm512_erfinv_ps +_mm512_exp10_pd +_mm512_exp10_ph +_mm512_exp10_ps +_mm512_exp2_pd +_mm512_exp2_ph +_mm512_exp2_ps +_mm512_exp_pd +_mm512_exp_ph +_mm512_exp_ps +_mm512_expm1_pd +_mm512_expm1_ph +_mm512_expm1_ps +_mm512_floor_ph +_mm512_hypot_pd +_mm512_hypot_ph +_mm512_hypot_ps +_mm512_invsqrt_pd +_mm512_invsqrt_ph +_mm512_invsqrt_ps +_mm512_log10_pd +_mm512_log10_ph +_mm512_log10_ps +_mm512_log1p_pd +_mm512_log1p_ph +_mm512_log1p_ps +_mm512_log2_pd +_mm512_log2_ph +_mm512_log2_ps +_mm512_log_pd +_mm512_log_ph +_mm512_log_ps +_mm512_logb_pd +_mm512_logb_ph +_mm512_logb_ps +_mm512_mask_acos_pd +_mm512_mask_acos_ph +_mm512_mask_acos_ps +_mm512_mask_acosh_pd +_mm512_mask_acosh_ph +_mm512_mask_acosh_ps +_mm512_mask_asin_pd +_mm512_mask_asin_ph +_mm512_mask_asin_ps +_mm512_mask_asinh_pd +_mm512_mask_asinh_ph +_mm512_mask_asinh_ps +_mm512_mask_atan2_pd +_mm512_mask_atan2_ps +_mm512_mask_atan_pd +_mm512_mask_atan_ph +_mm512_mask_atan_ph +_mm512_mask_atanh_pd +_mm512_mask_atanh_ph +_mm512_mask_atanh_ps +_mm512_mask_cbrt_pd +_mm512_mask_cbrt_ph +_mm512_mask_cbrt_ps +_mm512_mask_cdfnorm_pd +_mm512_mask_cdfnorm_ph +_mm512_mask_cdfnorm_ps +_mm512_mask_cdfnorminv_pd +_mm512_mask_cdfnorminv_ph +_mm512_mask_cdfnorminv_ps +_mm512_mask_ceil_ph +_mm512_mask_cos_pd +_mm512_mask_cos_ph +_mm512_mask_cos_ps +_mm512_mask_cosd_pd +_mm512_mask_cosd_ph +_mm512_mask_cosd_ps +_mm512_mask_cosh_pd +_mm512_mask_cosh_ph +_mm512_mask_cosh_ps +_mm512_mask_atan_ps +_mm512_cosd_pd +_mm512_cos_ps +_mm512_cos_ph +_mm512_cos_pd +_mm512_mask_div_epi32 +_mm512_mask_div_epu32 +_mm512_mask_erf_pd +_mm512_mask_erf_ph +_mm512_mask_erf_ps +_mm512_mask_erfc_pd +_mm512_mask_erfc_ph +_mm512_mask_erfc_ps +_mm512_mask_erfcinv_pd +_mm512_mask_erfcinv_ph +_mm512_mask_erfcinv_ps +_mm512_mask_erfinv_pd +_mm512_mask_erfinv_ph +_mm512_mask_erfinv_ps +_mm512_mask_exp10_pd +_mm512_mask_exp10_ph +_mm512_mask_exp10_ps +_mm512_mask_exp2_pd +_mm512_mask_exp2_ph +_mm512_mask_exp2_ps +_mm512_mask_exp_pd +_mm512_mask_exp_ph +_mm512_mask_exp_ps +_mm512_mask_expm1_pd +_mm512_mask_expm1_ph +_mm512_mask_expm1_ps +_mm512_mask_floor_ph +_mm512_mask_hypot_pd +_mm512_mask_hypot_ps +_mm512_mask_invsqrt_pd +_mm512_mask_invsqrt_ph +_mm512_mask_invsqrt_ps +_mm512_mask_log10_pd +_mm512_mask_log10_ph +_mm512_mask_log10_ps +_mm512_mask_log1p_pd +_mm512_mask_log1p_ph +_mm512_mask_log1p_ps +_mm512_mask_log2_pd +_mm512_mask_log2_ph +_mm512_mask_log2_ps +_mm512_mask_log_pd +_mm512_mask_log_ph +_mm512_mask_log_ps +_mm512_mask_logb_pd +_mm512_mask_logb_ph +_mm512_mask_logb_ps +_mm512_mask_nearbyint_pd +_mm512_mask_nearbyint_ph +_mm512_mask_nearbyint_ps +_mm512_mask_pow_pd +_mm512_mask_pow_ps +_mm512_mask_recip_pd +_mm512_mask_recip_ph +_mm512_mask_recip_ps +_mm512_mask_rem_epi32 +_mm512_mask_rem_epu32 +_mm512_mask_rint_pd +_mm512_mask_rint_ph +_mm512_mask_rint_ps +_mm512_mask_sin_pd +_mm512_mask_sin_ph +_mm512_mask_sin_ps +_mm512_mask_sind_pd +_mm512_mask_sind_ph +_mm512_mask_sind_ps +_mm512_mask_sinh_pd +_mm512_mask_sinh_ph +_mm512_mask_sinh_ps +_mm512_mask_svml_round_pd +_mm512_mask_svml_round_ph +_mm512_mask_tan_pd +_mm512_mask_tan_ph +_mm512_mask_tan_ps +_mm512_mask_tand_pd +_mm512_mask_tand_ph +_mm512_mask_tand_ps +_mm512_mask_tanh_pd +_mm512_mask_tanh_ph +_mm512_mask_tanh_ps +_mm512_mask_trunc_pd +_mm512_mask_trunc_ph +_mm512_mask_trunc_ps +_mm512_nearbyint_pd +_mm512_nearbyint_ph +_mm512_nearbyint_ps +_mm512_pow_pd +_mm512_pow_ph +_mm512_pow_ps +_mm512_recip_pd +_mm512_recip_ph +_mm512_recip_ps +_mm512_rem_epi16 +_mm512_rem_epi32 +_mm512_rem_epi64 +_mm512_rem_epi8 +_mm512_rem_epu16 +_mm512_rem_epu32 +_mm512_rem_epu64 +_mm512_rem_epu8 +_mm512_rint_pd +_mm512_rint_ph +_mm512_rint_ps +_mm512_sin_pd +_mm512_sin_ph +_mm512_sin_ps +_mm512_sind_pd +_mm512_sind_ph +_mm512_sind_ps +_mm512_sinh_pd +_mm512_sinh_ph +_mm512_sinh_ps +_mm512_svml_round_pd +_mm512_svml_round_ph +_mm512_tan_pd +_mm512_tan_ph +_mm512_tan_ps +_mm512_tand_pd +_mm512_tand_ph +_mm512_tand_ps +_mm512_tanh_pd +_mm512_tanh_ph +_mm512_tanh_ps +_mm512_trunc_pd +_mm512_trunc_ph +_mm512_trunc_ps +_mm_acos_pd +_mm_acos_ph +_mm_acos_ps +_mm_acosh_pd +_mm_acosh_ph +_mm_acosh_ps +_mm_asin_pd +_mm_asin_ph +_mm_asin_ps +_mm_asinh_pd +_mm_asinh_ph +_mm_asinh_ps +_mm_atan2_pd +_mm_atan2_ph +_mm_atan2_ps +_mm_atan_pd +_mm_atan_ph +_mm_atan_ps +_mm_atanh_pd +_mm_atanh_ph +_mm_atanh_ps +_mm_cbrt_pd +_mm_cbrt_ph +_mm_cbrt_ps +_mm_cdfnorm_pd +_mm_cdfnorm_ph +_mm_cdfnorm_ps +_mm_cdfnorminv_pd +_mm_cdfnorminv_ph +_mm_cdfnorminv_ps +_mm_cexp_ps +_mm_clog_ps +_mm_cos_pd +_mm_cos_ph +_mm_cos_ps +_mm_cosd_pd +_mm_cosd_ph +_mm_cosd_ps +_mm_cosh_pd +_mm_cosh_ph +_mm_cosh_ps +_mm_csqrt_ps +_mm_cvtsd_si64x +_mm_cvtsi128_si64x +_mm_cvtsi64x_sd +_mm_cvtsi64x_si128 +_mm_cvttsd_si64x +_mm_div_epi16 +_mm_div_epi32 +_mm_div_epi64 +_mm_div_epi8 +_mm_div_epu16 +_mm_div_epu32 +_mm_div_epu64 +_mm_div_epu8 +_mm_dpbssd_epi32 +_mm_dpbssds_epi32 +_mm_dpbsud_epi32 +_mm_dpbsuds_epi32 +_mm_dpbuud_epi32 +_mm_dpbuuds_epi32 +_mm_dpwsud_epi32 +_mm_dpwsuds_epi32 +_mm_dpwusd_epi32 +_mm_dpwusds_epi32 +_mm_dpwuud_epi32 +_mm_dpwuuds_epi32 +_mm_erf_pd +_mm_erf_ph +_mm_erf_ps +_mm_erfc_pd +_mm_erfc_ph +_mm_erfc_ps +_mm_erfcinv_pd +_mm_erfcinv_ph +_mm_erfcinv_ps +_mm_erfinv_pd +_mm_erfinv_ph +_mm_erfinv_ps +_mm_exp10_pd +_mm_exp10_ph +_mm_exp10_ps +_mm_exp2_pd +_mm_exp2_ph +_mm_exp2_ps +_mm_exp_pd +_mm_exp_ph +_mm_exp_ps +_mm_expm1_pd +_mm_expm1_ph +_mm_expm1_ps +_mm_hypot_pd +_mm_hypot_ph +_mm_hypot_ps +_mm_idiv_epi32 +_mm_invcbrt_pd +_mm_invcbrt_ph +_mm_invcbrt_ps +_mm_invsqrt_pd +_mm_invsqrt_ph +_mm_invsqrt_ps +_mm_irem_epi32 +_mm_log10_pd +_mm_log10_ph +_mm_log10_ps +_mm_log1p_pd +_mm_log1p_ph +_mm_log1p_ps +_mm_log2_pd +_mm_log2_ph +_mm_log2_ps +_mm_log_pd +_mm_log_ph +_mm_log_ps +_mm_logb_pd +_mm_logb_ph +_mm_logb_ps +_mm_madd52hi_avx_epu64 +_mm_madd52lo_avx_epu64 +_mm_mask_reduce_add_epi16 +_mm_mask_reduce_add_epi8 +_mm_mask_reduce_and_epi16 +_mm_mask_reduce_and_epi8 +_mm_mask_reduce_max_epi16 +_mm_mask_reduce_max_epi8 +_mm_mask_reduce_max_epu16 +_mm_mask_reduce_max_epu8 +_mm_mask_reduce_min_epi16 +_mm_mask_reduce_min_epi8 +_mm_mask_reduce_min_epu16 +_mm_mask_reduce_min_epu8 +_mm_mask_reduce_mul_epi16 +_mm_mask_reduce_mul_epi8 +_mm_mask_reduce_or_epi16 +_mm_mask_reduce_or_epi8 +_mm_pow_pd +_mm_pow_ph +_mm_pow_ps +_mm_reduce_add_epi16 +_mm_reduce_add_epi8 +_mm_reduce_and_epi16 +_mm_reduce_and_epi8 +_mm_reduce_max_epi16 +_mm_reduce_max_epi8 +_mm_reduce_max_epu16 +_mm_reduce_max_epu8 +_mm_reduce_min_epi16 +_mm_reduce_min_epi8 +_mm_reduce_min_epu16 +_mm_reduce_min_epu8 +_mm_reduce_mul_epi16 +_mm_reduce_mul_epi8 +_mm_reduce_or_epi16 +_mm_reduce_or_epi8 +_mm_rem_epi16 +_mm_rem_epi32 +_mm_rem_epi64 +_mm_rem_epi8 +_mm_rem_epu16 +_mm_rem_epu32 +_mm_rem_epu64 +_mm_rem_epu8 +_mm_sin_pd +_mm_sin_ph +_mm_sin_ps +_mm_sind_pd +_mm_sind_ph +_mm_sind_ps +_mm_sinh_pd +_mm_sinh_ph +_mm_sinh_ps +_mm_sm3msg1_epi32 +_mm_sm3msg2_epi32 +_mm_sm3rnds2_epi32 +_mm_sm4key4_epi32 +_mm_sm4rnds4_epi32 +_mm_svml_ceil_pd +_mm_svml_ceil_ph +_mm_svml_ceil_ps +_mm_svml_floor_pd +_mm_svml_floor_ph +_mm_svml_floor_ps +_mm_svml_round_pd +_mm_svml_round_ph +_mm_svml_round_ps +_mm_svml_sqrt_pd +_mm_svml_sqrt_ph +_mm_svml_sqrt_ps +_mm_tan_pd +_mm_tan_ph +_mm_tan_ps +_mm_tand_pd +_mm_tand_ph +_mm_tand_ps +_mm_tanh_pd +_mm_tanh_ph +_mm_tanh_ps +_mm_trunc_pd +_mm_trunc_ph +_mm_trunc_ps +_mm_udiv_epi32 +_mm_urem_epi32 +_popcnt32 +_popcnt64 +_rdpmc +_rotl +_rotl64 +_rotr +_rotr64 +_rotwl +_rotwr +_urdmsr + +# Cannot find value in this scope (in Rust testfiles) +_mm512_set1_pch +_mm_abs_pi16 +_mm_abs_pi32 +_mm_abs_pi8 +_mm_add_pi16 +_mm_add_pi32 +_mm_add_pi8 +_mm_add_si64 +_mm_adds_pi16 +_mm_adds_pi8 +_mm_adds_pu16 +_mm_adds_pu8 +_mm_alignr_pi8 +_mm_and_si64 +_mm_andnot_si64 +_mm_avg_pu16 +_mm_avg_pu8 +_mm_cmpeq_pi16 +_mm_cmpeq_pi32 +_mm_cmpeq_pi8 +_mm_cmpgt_pi16 +_mm_cmpgt_pi32 +_mm_cmpgt_pi8 +_mm_cvt_pi2ps +_mm_cvt_ps2pi +_mm_cvtm64_si64 +_mm_cvtpd_pi32 +_mm_cvtpi16_ps +_mm_cvtpi32_pd +_mm_cvtpi32_ps +_mm_cvtpi32x2_ps +_mm_cvtpi8_ps +_mm_cvtps_pi16 +_mm_cvtps_pi32 +_mm_cvtps_pi8 +_mm_cvtpu16_ps +_mm_cvtpu8_ps +_mm_cvtsi32_si64 +_mm_cvtsi64_m64 +_mm_cvtsi64_si32 +_mm_cvtt_ps2pi +_mm_cvttpd_pi32 +_mm512_cbrt_pd +_mm512_cbrt_ph +_mm512_cbrt_ps +_mm512_cdfnorm_pd +_mm512_cdfnorm_ph +_mm512_cdfnorm_ps +_mm512_cdfnorminv_pd +_mm512_cdfnorminv_ph +_mm512_cdfnorminv_ps +_mm512_ceil_pd +_mm512_ceil_ph +_mm512_ceil_ps +_mm512_floor_pd +_mm512_floor_ps +_mm512_mask_ceil_pd +_mm512_mask_ceil_ps +_mm_max_pi16 +_mm_max_pu8 +_mm_min_pi16 +_mm_min_pu8 +_mm_movemask_pi8 +_mm_movepi64_pi64 +_mm_movpi64_epi64 +_mm_mul_su32 +_mm_mulhi_pi16 +_mm_mulhi_pu16 +_mm_mulhrs_pi16 +_mm_mullo_pi16 +_mm_or_si64 +_mm_packs_pi16 +_mm_packs_pi32 +_mm_packs_pu16 +_mm_popcnt_u32 +_mm_popcnt_u64 +_mm_sad_pu8 +_mm_set1_epi64 +_mm_set1_pch +_mm_set1_pi16 +_mm_set1_pi32 +_mm_set1_pi8 +_mm_set_epi64 +_mm_set_pi16 +_mm_set_pi32 +_mm_set_pi8 +_mm_setr_epi64 +_mm_setr_pi16 +_mm_setr_pi32 +_mm_setr_pi8 +_mm_shuffle_pi16 +_mm_shuffle_pi8 +_mm_sign_pi16 +_mm_sign_pi32 +_mm_sign_pi8 +_mm_sll_pi16 +_mm_sll_pi32 +_mm_sll_si64 +_mm_slli_pi16 +_mm_slli_pi32 +_mm_slli_si64 +_mm_sra_pi16 +_mm_sra_pi32 +_mm_srai_pi16 +_mm_srai_pi32 +_mm_srl_pi16 +_mm_srl_pi32 +_mm_srl_si64 +_mm_srli_pi16 +_mm_srli_pi32 +_mm_srli_si64 +_mm_sub_pi16 +_mm_sub_pi32 +_mm_sub_pi8 +_mm_sub_si64 +_mm_subs_pi16 +_mm_subs_pi8 +_mm_subs_pu16 +_mm_subs_pu8 +_mm_unpackhi_pi16 +_mm_unpackhi_pi32 +_mm_unpackhi_pi8 +_mm_unpacklo_pi16 +_mm_unpacklo_pi32 +_mm_unpacklo_pi8 +_mm_xor_si64 +_mm256_pow_pd +_mm256_pow_ph +_mm256_pow_ps +_mm256_rem_epi16 +_mm256_rem_epi32 +_mm256_rem_epi64 +_mm256_rem_epi8 +_mm256_rem_epu16 +_mm256_rem_epu32 +_mm256_rem_epu64 +_mm256_rem_epu8 +_mm256_set1_pch +_mm256_sin_pd +_mm256_sin_ph +_mm256_sin_ps +_mm256_sind_pd +_mm256_sind_ph +_mm256_sind_ps +_mm256_sinh_pd +_mm256_sinh_ph +_mm256_sinh_ps +_mm256_svml_ceil_pd +_mm256_svml_ceil_ph +_mm256_svml_ceil_ps +_mm256_svml_floor_pd +_mm256_svml_floor_ph +_mm256_svml_floor_ps +_mm256_svml_round_pd +_mm256_svml_round_ph +_mm256_svml_round_ps +_mm256_svml_sqrt_pd +_mm256_svml_sqrt_ph +_mm256_svml_sqrt_ps +_mm256_tan_pd +_mm256_tan_ph +_mm256_tan_ps +_mm256_tand_pd +_mm256_tand_ph +_mm256_tand_ps +_mm256_tanh_pd +_mm256_tanh_ph +_mm256_tanh_ps +_mm256_trunc_pd +_mm256_trunc_ph +_mm256_trunc_ps +_mm256_udiv_epi32 +_mm256_urem_epi32 +_mm512_acos_pd +_mm512_acos_ph +_mm512_acos_ps +_mm512_acosh_pd +_mm512_acosh_ph +_mm512_acosh_ps +_mm_cvttps_pi32 +_mm_extract_pi16 +_mm_hadd_pi16 +_mm_hadd_pi32 +_mm_hadds_pi16 +_mm_hsub_pi16 +_mm_hsub_pi32 +_mm_hsubs_pi16 +_mm_insert_pi16 +_mm_madd_pi16 +_mm_maddubs_pi16 +_mm512_asin_pd +_mm512_asin_ph +_mm512_asin_ps +_mm512_asinh_pd +_mm512_asinh_ph +_mm512_asinh_ps +_mm512_atan2_pd +_mm512_atan2_ph +_mm512_atan2_ps +_mm512_atan_pd +_mm512_atan_ph +_mm512_atan_ps +_mm512_atanh_pd +_mm512_atanh_ph +_mm512_atanh_ps +_cvtsh_ss +_cvtss_sh +_m_from_int +_m_from_int64 +_m_packssdw +_m_packsswb +_m_packuswb +_m_paddb +_m_paddd +_m_paddsb +_m_paddsw +_m_paddusb +_m_paddusw +_m_paddw +_m_pand +_m_pandn +_m_pavgb +_m_pavgw +_m_pcmpeqb +_m_pcmpeqd +_m_pcmpeqw +_m_pcmpgtb +_m_pcmpgtd +_m_pcmpgtw +_m_pextrw +_m_pinsrw +_m_pmaddwd +_m_pmaxsw +_m_pmaxub +_m_pminsw +_m_pminub +_m_pmovmskb +_m_pmulhuw +_m_pmulhw +_m_pmullw +_m_por +_m_psadbw +_m_pshufw +_m_pslld +_m_pslldi +_m_psllq +_m_psllqi +_m_psllw +_m_psllwi +_m_psrad +_m_psradi +_m_psraw +_m_psrawi +_m_psrld +_m_psrldi +_m_psrlq +_m_psrlqi +_m_psrlw +_m_psrlwi +_m_psubb +_m_psubd +_m_psubsb +_m_psubsw +_m_psubusb +_m_psubusw +_m_psubw +_m_punpckhbw +_m_punpckhdq +_m_punpckhwd +_m_punpcklbw +_m_punpckldq +_m_punpcklwd +_m_pxor +_m_to_int +_m_to_int64 +_mm512_mask_floor_pd +_mm512_mask_floor_ps \ No newline at end of file From d9b51bbbe59171fffa14e529044c3a4de1b2dba2 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sun, 28 Sep 2025 01:37:09 +0530 Subject: [PATCH 199/358] feat: added custom helper functions (that helped load intrinsic arguments in Rust) to C++ testfiles. Also added extra compilation flags --- .../crates/intrinsic-test/src/x86/compile.rs | 2 + .../crates/intrinsic-test/src/x86/config.rs | 51 ++++++++++++++++--- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/compile.rs b/stdarch/crates/intrinsic-test/src/x86/compile.rs index 6eaab8615059e..c0d2b12d0ef9f 100644 --- a/stdarch/crates/intrinsic-test/src/x86/compile.rs +++ b/stdarch/crates/intrinsic-test/src/x86/compile.rs @@ -24,6 +24,8 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { "-mavx512dq", "-mavx512cd", "-mavx512fp16", + "-msha512", + "-msm4", "-ferror-limit=1000", ]); diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index cb4c99406698b..778448a3bd789 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -270,17 +270,17 @@ std::ostream& operator<<(std::ostream& os, __m512i value) { } // T1 is the `To` type, T2 is the `From` type -template T1 cast(T2 x) {{ - if (std::is_convertible::value) {{ +template T1 cast(T2 x) { + if (std::is_convertible::value) { return x; - }} else if (sizeof(T1) == sizeof(T2)) {{ - T1 ret{{}}; + } else if (sizeof(T1) == sizeof(T2)) { + T1 ret{}; memcpy(&ret, &x, sizeof(T1)); return ret; - }} else {{ + } else { assert("T2 must either be convertable to T1, or have the same size as T1!"); - }} -}} + } +} #define _mm512_extract_intrinsic_test_epi8(m, lane) \ _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) @@ -299,6 +299,43 @@ template T1 cast(T2 x) {{ #define _mm64_extract_intrinsic_test_epi32(m, lane) \ _mm_cvtsi64_si32(_mm_srli_si64(m, (lane) * 32)) + +// Load f16 (__m128h) and cast to integer (__m128i) +#define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr)) +#define _mm256_loadu_ph_to___m256i(mem_addr) _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) +#define _mm512_loadu_ph_to___m512i(mem_addr) _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) + +// Load f32 (__m128) and cast to f16 (__m128h) +#define _mm_loadu_ps_to___m128h(mem_addr) _mm_castps_ph(_mm_loadu_ps(mem_addr)) +#define _mm256_loadu_ps_to___m256h(mem_addr) _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) +#define _mm512_loadu_ps_to___m512h(mem_addr) _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) + +// Load integer types and cast to double (__m128d, __m256d, __m512d) +#define _mm_loadu_epi16_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) +#define _mm256_loadu_epi16_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) +#define _mm512_loadu_epi16_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) + +#define _mm_loadu_epi32_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) +#define _mm256_loadu_epi32_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) +#define _mm512_loadu_epi32_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) + +#define _mm_loadu_epi64_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) +#define _mm256_loadu_epi64_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) +#define _mm512_loadu_epi64_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) + +// Load integer types and cast to float (__m128, __m256, __m512) +#define _mm_loadu_epi16_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) +#define _mm256_loadu_epi16_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) +#define _mm512_loadu_epi16_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) + +#define _mm_loadu_epi32_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) +#define _mm256_loadu_epi32_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) +#define _mm512_loadu_epi32_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) + +#define _mm_loadu_epi64_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) +#define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) +#define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) + "#; pub const X86_CONFIGURATIONS: &str = r#" From 278da1be4e6f5ace6737f4ca00ccdb7cf86ac311 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sun, 28 Sep 2025 13:24:06 +0530 Subject: [PATCH 200/358] chore: add more compiler flags for compiling x86 intrinsics in C++ --- stdarch/crates/intrinsic-test/src/x86/compile.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/stdarch/crates/intrinsic-test/src/x86/compile.rs b/stdarch/crates/intrinsic-test/src/x86/compile.rs index c0d2b12d0ef9f..27fd5d831c492 100644 --- a/stdarch/crates/intrinsic-test/src/x86/compile.rs +++ b/stdarch/crates/intrinsic-test/src/x86/compile.rs @@ -26,7 +26,15 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { "-mavx512fp16", "-msha512", "-msm4", + "-mavxvnni", + "-mavx512bitalg", + "-mavx512ifma", + "-mavx512vbmi", + "-mavx512vbmi2", + "-mavx512vnni", + "-mavx512vpopcntdq", "-ferror-limit=1000", + "-std=c++17", ]); if !cpp_compiler.contains("clang") { From 65e124b05d1ce1f05931795061739bb6c971c0bf Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 29 Sep 2025 00:13:53 +0530 Subject: [PATCH 201/358] chore: add verbose cli option to C++ compiler --- stdarch/crates/intrinsic-test/src/common/compile_c.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/src/common/compile_c.rs b/stdarch/crates/intrinsic-test/src/common/compile_c.rs index 258e41816583b..fa78b332a7857 100644 --- a/stdarch/crates/intrinsic-test/src/common/compile_c.rs +++ b/stdarch/crates/intrinsic-test/src/common/compile_c.rs @@ -119,7 +119,7 @@ impl CppCompilation { output: &str, ) -> std::io::Result { let mut cmd = clone_command(&self.0); - cmd.args([input, "-c", "-o", output]); + cmd.args([input, "-v", "-c", "-o", output]); cmd.output() } From f3d9853e6b3edeb5f95e1243d0ed983ae722a102 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 29 Sep 2025 01:23:15 +0530 Subject: [PATCH 202/358] feat: add clang to dockerfile and change clang++-19 to clang++ --- stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 4 +++- stdarch/ci/run.sh | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index bbebe2d7fa6b6..a35136a9c1904 100644 --- a/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -6,7 +6,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ make \ ca-certificates \ wget \ - xz-utils + xz-utils \ + clang \ + lld RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.58.0-2025-06-16-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde diff --git a/stdarch/ci/run.sh b/stdarch/ci/run.sh index dc5a78723bfe2..c8dc6a2e8b623 100755 --- a/stdarch/ci/run.sh +++ b/stdarch/ci/run.sh @@ -91,7 +91,7 @@ fi case ${TARGET} in x86_64-unknown-linux-gnu) TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/" - TEST_CXX_COMPILER="clang++-19" + TEST_CXX_COMPILER="clang++" TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt export STDARCH_DISABLE_ASSERT_INSTR=1 From a11f968b7bbbf462f67a6401617c50b2534deb27 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 29 Sep 2025 01:45:13 +0530 Subject: [PATCH 203/358] fix: add `libstdc++-dev` to fix `iostream not found` error --- stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index a35136a9c1904..2743896375cf3 100644 --- a/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -8,6 +8,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ xz-utils \ clang \ + libstdc++-14-dev \ + build-essential \ lld RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.58.0-2025-06-16-lin.tar.xz -O sde.tar.xz From 3b831290c487cb114e82a72b577bcbe3bbfe0f07 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 29 Sep 2025 16:22:37 +0530 Subject: [PATCH 204/358] fix: making compilation step run one by one to prevent the process from being killed. Also separated declarations and definitions for C++ testfiles. --- .../crates/intrinsic-test/src/common/mod.rs | 12 +- .../crates/intrinsic-test/src/x86/config.rs | 165 +++++++++--------- stdarch/crates/intrinsic-test/src/x86/mod.rs | 4 +- 3 files changed, 99 insertions(+), 82 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/mod.rs b/stdarch/crates/intrinsic-test/src/common/mod.rs index 37a48654e4ca3..f38f0e5a7c5d0 100644 --- a/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -72,19 +72,29 @@ pub trait SupportedArchitectureTest { return Err(format!("Error writing to mod_{i}.cpp: {error:?}")); } + println!("Finished writing mod_{i}.cpp"); + + Ok(()) + }) + .collect::>() + .unwrap(); + + (0..chunk_count) + .map(|i| { // compile this cpp file into a .o file. // // This is done because `cpp_compiler_wrapped` is None when // the --generate-only flag is passed + println!("compiling mod_{i}.cpp"); if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { let compile_output = cpp_compiler .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o")); + println!("finished compiling mod_{i}.cpp"); if let Err(compile_error) = compile_output { return Err(format!("Error compiling mod_{i}.cpp: {compile_error:?}")); } } - Ok(()) }) .collect::>() diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 778448a3bd789..159851c104f7e 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -208,18 +208,93 @@ impl DebugHexF16 for __m512i { } "#; -pub const LANE_FUNCTION_HELPERS: &str = r#" -typedef _Float16 float16_t; -typedef float float32_t; -typedef double float64_t; - -#define __int64 long long -#define __int32 int +pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" +#ifndef X86_DECLARATIONS +#define X86_DECLARATIONS + typedef _Float16 float16_t; + typedef float float32_t; + typedef double float64_t; + + #define __int64 long long + #define __int32 int -std::ostream& operator<<(std::ostream& os, _Float16 value); -std::ostream& operator<<(std::ostream& os, __m128i value); -std::ostream& operator<<(std::ostream& os, __m256i value); -std::ostream& operator<<(std::ostream& os, __m512i value); + std::ostream& operator<<(std::ostream& os, _Float16 value); + std::ostream& operator<<(std::ostream& os, __m128i value); + std::ostream& operator<<(std::ostream& os, __m256i value); + std::ostream& operator<<(std::ostream& os, __m512i value); + + #define _mm512_extract_intrinsic_test_epi8(m, lane) \ + _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) + + #define _mm512_extract_intrinsic_test_epi16(m, lane) \ + _mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8) + + #define _mm512_extract_intrinsic_test_epi32(m, lane) \ + _mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4) + + #define _mm512_extract_intrinsic_test_epi64(m, lane) \ + _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2) + + #define _mm64_extract_intrinsic_test_epi8(m, lane) \ + ((_mm_extract_pi16((m), (lane) / 2) >> (((lane) % 2) * 8)) & 0xFF) + + #define _mm64_extract_intrinsic_test_epi32(m, lane) \ + _mm_cvtsi64_si32(_mm_srli_si64(m, (lane) * 32)) + + // Load f16 (__m128h) and cast to integer (__m128i) + #define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr)) + #define _mm256_loadu_ph_to___m256i(mem_addr) _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) + #define _mm512_loadu_ph_to___m512i(mem_addr) _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) + + // Load f32 (__m128) and cast to f16 (__m128h) + #define _mm_loadu_ps_to___m128h(mem_addr) _mm_castps_ph(_mm_loadu_ps(mem_addr)) + #define _mm256_loadu_ps_to___m256h(mem_addr) _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) + #define _mm512_loadu_ps_to___m512h(mem_addr) _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) + + // Load integer types and cast to double (__m128d, __m256d, __m512d) + #define _mm_loadu_epi16_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) + #define _mm256_loadu_epi16_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) + #define _mm512_loadu_epi16_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) + + #define _mm_loadu_epi32_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) + #define _mm256_loadu_epi32_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) + #define _mm512_loadu_epi32_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) + + #define _mm_loadu_epi64_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) + #define _mm256_loadu_epi64_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) + #define _mm512_loadu_epi64_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) + + // Load integer types and cast to float (__m128, __m256, __m512) + #define _mm_loadu_epi16_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) + #define _mm256_loadu_epi16_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) + #define _mm512_loadu_epi16_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) + + #define _mm_loadu_epi32_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) + #define _mm256_loadu_epi32_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) + #define _mm512_loadu_epi32_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) + + #define _mm_loadu_epi64_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) + #define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) + #define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) + + + // T1 is the `To` type, T2 is the `From` type + template T1 cast(T2 x) { + if constexpr (std::is_convertible_v) { + return x; + } else if constexpr (sizeof(T1) == sizeof(T2)) { + T1 ret{}; + std::memcpy(&ret, &x, sizeof(T1)); + return ret; + } else { + static_assert(sizeof(T1) == sizeof(T2) || std::is_convertible_v, + "T2 must either be convertible to T1, or have the same size as T1!"); + return T1{}; + } + } +#endif +"#; +pub const PLATFORM_C_DEFINITIONS: &str = r#" std::ostream& operator<<(std::ostream& os, _Float16 value) { uint16_t temp = 0; @@ -268,74 +343,6 @@ std::ostream& operator<<(std::ostream& os, __m512i value) { os << ss.str(); return os; } - -// T1 is the `To` type, T2 is the `From` type -template T1 cast(T2 x) { - if (std::is_convertible::value) { - return x; - } else if (sizeof(T1) == sizeof(T2)) { - T1 ret{}; - memcpy(&ret, &x, sizeof(T1)); - return ret; - } else { - assert("T2 must either be convertable to T1, or have the same size as T1!"); - } -} - -#define _mm512_extract_intrinsic_test_epi8(m, lane) \ - _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) - -#define _mm512_extract_intrinsic_test_epi16(m, lane) \ - _mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8) - -#define _mm512_extract_intrinsic_test_epi32(m, lane) \ - _mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4) - -#define _mm512_extract_intrinsic_test_epi64(m, lane) \ - _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2) - -#define _mm64_extract_intrinsic_test_epi8(m, lane) \ - ((_mm_extract_pi16((m), (lane) / 2) >> (((lane) % 2) * 8)) & 0xFF) - -#define _mm64_extract_intrinsic_test_epi32(m, lane) \ - _mm_cvtsi64_si32(_mm_srli_si64(m, (lane) * 32)) - -// Load f16 (__m128h) and cast to integer (__m128i) -#define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr)) -#define _mm256_loadu_ph_to___m256i(mem_addr) _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) -#define _mm512_loadu_ph_to___m512i(mem_addr) _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) - -// Load f32 (__m128) and cast to f16 (__m128h) -#define _mm_loadu_ps_to___m128h(mem_addr) _mm_castps_ph(_mm_loadu_ps(mem_addr)) -#define _mm256_loadu_ps_to___m256h(mem_addr) _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) -#define _mm512_loadu_ps_to___m512h(mem_addr) _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) - -// Load integer types and cast to double (__m128d, __m256d, __m512d) -#define _mm_loadu_epi16_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) -#define _mm256_loadu_epi16_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) -#define _mm512_loadu_epi16_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - -#define _mm_loadu_epi32_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) -#define _mm256_loadu_epi32_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) -#define _mm512_loadu_epi32_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - -#define _mm_loadu_epi64_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) -#define _mm256_loadu_epi64_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) -#define _mm512_loadu_epi64_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - -// Load integer types and cast to float (__m128, __m256, __m512) -#define _mm_loadu_epi16_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) -#define _mm256_loadu_epi16_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) -#define _mm512_loadu_epi16_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - -#define _mm_loadu_epi32_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) -#define _mm256_loadu_epi32_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) -#define _mm512_loadu_epi32_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - -#define _mm_loadu_epi64_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) -#define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) -#define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - "#; pub const X86_CONFIGURATIONS: &str = r#" diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index d5ebd960b30c3..1eac6fb5f9306 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -37,8 +37,8 @@ impl SupportedArchitectureTest for X86ArchitectureTest { const NOTICE: &str = config::NOTICE; const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h", "cstddef", "cstdint"]; - const PLATFORM_C_DEFINITIONS: &str = config::LANE_FUNCTION_HELPERS; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::LANE_FUNCTION_HELPERS; + const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; + const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; const PLATFORM_RUST_DEFINITIONS: &str = config::F16_FORMATTING_DEF; const PLATFORM_RUST_CFGS: &str = config::X86_CONFIGURATIONS; From 71db5d1d78586e7787d1fb9dedc687d26d2ba092 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 30 Sep 2025 01:38:52 +0530 Subject: [PATCH 205/358] feat: attempting compilation of smaller chunks for faster parallel processing --- .../crates/intrinsic-test/src/common/mod.rs | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/mod.rs b/stdarch/crates/intrinsic-test/src/common/mod.rs index f38f0e5a7c5d0..67bc81f99c881 100644 --- a/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -49,7 +49,7 @@ pub trait SupportedArchitectureTest { fn cpp_compilation(&self) -> Option; fn build_c_file(&self) -> bool { - let (chunk_size, chunk_count) = chunk_info(self.intrinsics().len()); + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 100); let cpp_compiler_wrapped = self.cpp_compilation(); @@ -72,15 +72,6 @@ pub trait SupportedArchitectureTest { return Err(format!("Error writing to mod_{i}.cpp: {error:?}")); } - println!("Finished writing mod_{i}.cpp"); - - Ok(()) - }) - .collect::>() - .unwrap(); - - (0..chunk_count) - .map(|i| { // compile this cpp file into a .o file. // // This is done because `cpp_compiler_wrapped` is None when @@ -135,7 +126,7 @@ pub trait SupportedArchitectureTest { fn build_rust_file(&self) -> bool { std::fs::create_dir_all("rust_programs/src").unwrap(); - let (chunk_size, chunk_count) = chunk_info(self.intrinsics().len()); + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 100); let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); @@ -205,9 +196,13 @@ pub trait SupportedArchitectureTest { } } -pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) { - let available_parallelism = std::thread::available_parallelism().unwrap().get(); - let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count)); +// pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) { +// let available_parallelism = std::thread::available_parallelism().unwrap().get(); +// let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count)); + +// (chunk_size, intrinsic_count.div_ceil(chunk_size)) +// } +pub fn manual_chunk(intrinsic_count: usize, chunk_size: usize) -> (usize, usize) { (chunk_size, intrinsic_count.div_ceil(chunk_size)) } From 14f1622e3f15a53f7be6857872eb22837b65431c Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 30 Sep 2025 14:32:14 +0530 Subject: [PATCH 206/358] feat: add c_programs to PATH and increase chunk size to 400 --- stdarch/ci/run.sh | 2 ++ stdarch/crates/intrinsic-test/src/common/mod.rs | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/stdarch/ci/run.sh b/stdarch/ci/run.sh index c8dc6a2e8b623..a74769c56dba6 100755 --- a/stdarch/ci/run.sh +++ b/stdarch/ci/run.sh @@ -95,6 +95,8 @@ case ${TARGET} in TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt export STDARCH_DISABLE_ASSERT_INSTR=1 + PATH="$PATH":"$(pwd)"/c_programs + export PATH export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" cargo_test "${PROFILE}" diff --git a/stdarch/crates/intrinsic-test/src/common/mod.rs b/stdarch/crates/intrinsic-test/src/common/mod.rs index 67bc81f99c881..86a7876807fd6 100644 --- a/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -49,7 +49,7 @@ pub trait SupportedArchitectureTest { fn cpp_compilation(&self) -> Option; fn build_c_file(&self) -> bool { - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 100); + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); let cpp_compiler_wrapped = self.cpp_compilation(); @@ -126,7 +126,7 @@ pub trait SupportedArchitectureTest { fn build_rust_file(&self) -> bool { std::fs::create_dir_all("rust_programs/src").unwrap(); - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 100); + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); From ff7e93e695e940854bc221f89abc41e796b479ee Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 3 Oct 2025 01:17:03 +0530 Subject: [PATCH 207/358] feat: display __mmask8 values so that non-utf8 values are not displayed --- stdarch/crates/intrinsic-test/src/x86/config.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 159851c104f7e..2f859af4647c3 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -222,6 +222,7 @@ pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" std::ostream& operator<<(std::ostream& os, __m128i value); std::ostream& operator<<(std::ostream& os, __m256i value); std::ostream& operator<<(std::ostream& os, __m512i value); + std::ostream& operator<<(std::ostream& os, __mmask8 value); #define _mm512_extract_intrinsic_test_epi8(m, lane) \ _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) @@ -343,6 +344,11 @@ std::ostream& operator<<(std::ostream& os, __m512i value) { os << ss.str(); return os; } + +std::ostream& operator<<(std::ostream& os, __mmask8 value) { + os << static_cast(value); + return os; +} "#; pub const X86_CONFIGURATIONS: &str = r#" From e89d0825b526e33fc7946915e47ff0ca01cad394 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 3 Oct 2025 18:39:53 +0530 Subject: [PATCH 208/358] feat: add formatting for __m128i, __m256i, __m512i types that is similar to C++ version of the same. --- .../intrinsic-test/src/common/gen_rust.rs | 3 +- .../intrinsic-test/src/common/intrinsic.rs | 16 +------ .../src/common/intrinsic_helpers.rs | 18 ++++++++ .../crates/intrinsic-test/src/x86/config.rs | 44 ++++++++++++++++++- .../crates/intrinsic-test/src/x86/types.rs | 12 +++++ 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs index 3b330879e05be..27f49a37b1cf6 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -4,7 +4,6 @@ use std::process::Command; use crate::common::intrinsic::Intrinsic; use super::indentation::Indentation; -use super::intrinsic::format_f16_return_value; use super::intrinsic_helpers::IntrinsicTypeDefinition; // The number of times each intrinsic will be called. @@ -233,7 +232,6 @@ pub fn generate_rust_test_loop( } } - let return_value = format_f16_return_value(intrinsic); let indentation2 = indentation.nested(); let indentation3 = indentation2.nested(); writeln!( @@ -250,6 +248,7 @@ pub fn generate_rust_test_loop( }}", loaded_args = intrinsic.arguments.load_values_rust(indentation3), args = intrinsic.arguments.as_call_param_rust(), + return_value = intrinsic.results.print_result_rust(), ) } diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic.rs index 95276d19b72f9..81f6d6d8b5b2c 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,5 +1,5 @@ use super::argument::ArgumentList; -use super::intrinsic_helpers::{IntrinsicTypeDefinition, TypeKind}; +use super::intrinsic_helpers::IntrinsicTypeDefinition; /// An intrinsic #[derive(Debug, PartialEq, Clone)] @@ -16,17 +16,3 @@ pub struct Intrinsic { /// Any architecture-specific tags. pub arch_tags: Vec, } - -pub fn format_f16_return_value(intrinsic: &Intrinsic) -> String { - // the `intrinsic-test` crate compares the output of C and Rust intrinsics. Currently, It uses - // a string representation of the output value to compare. In C, f16 values are currently printed - // as hexadecimal integers. Since https://github.com/rust-lang/rust/pull/127013, rust does print - // them as decimal floating point values. To keep the intrinsics tests working, for now, format - // vectors containing f16 values like C prints them. - let return_value = match intrinsic.results.kind() { - TypeKind::Float if intrinsic.results.inner_size() == 16 => "debug_f16(__return_value)", - _ => "format_args!(\"{__return_value:.150?}\")", - }; - - String::from(return_value) -} diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index c52bccb693d89..c0b9ed253539d 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -365,6 +365,24 @@ pub trait IntrinsicTypeDefinition: Deref { /// there is an int i in scope which is the current pass number. fn print_result_c(&self, indentation: Indentation, additional: &str) -> String; + /// Generates a std::cout for the intrinsics results that will match the + /// rust debug output format for the return type. The generated line assumes + /// there is an int i in scope which is the current pass number. + /// + /// The `intrinsic-test` crate compares the output of C and Rust intrinsics. Currently, It uses + /// a string representation of the output value to compare. In C, f16 values are currently printed + /// as hexadecimal integers. Since https://github.com/rust-lang/rust/pull/127013, rust does print + /// them as decimal floating point values. To keep the intrinsics tests working, for now, format + /// vectors containing f16 values like C prints them. + fn print_result_rust(&self) -> String { + let return_value = match self.kind() { + TypeKind::Float if self.inner_size() == 16 => "debug_f16(__return_value)", + _ => "format_args!(\"{__return_value:.150?}\")", + }; + + String::from(return_value) + } + /// To enable architecture-specific logic fn rust_scalar_type(&self) -> String { format!( diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 2f859af4647c3..bf2a37d78e91e 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -206,7 +206,49 @@ impl DebugHexF16 for __m512i { debug_simd_finish(f, "__m512i", &array) } } - "#; + +trait DebugI16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result; +} + +impl DebugI16 for i16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self) + } +} + +impl DebugI16 for __m128i { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [i16; 8]>(*self) }; + debug_simd_finish(f, "__m128i", &array) + } +} + +impl DebugI16 for __m256i { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [i16; 16]>(*self) }; + debug_simd_finish(f, "__m256i", &array) + } +} + +impl DebugI16 for __m512i { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let array = unsafe { core::mem::transmute::<_, [i16; 32]>(*self) }; + debug_simd_finish(f, "__m512i", &array) + } +} + +fn debug_i16(x: T) -> impl core::fmt::Debug { + struct DebugWrapper(T); + impl core::fmt::Debug for DebugWrapper { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.0.fmt(f) + } + } + DebugWrapper(x) +} + +"#; pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" #ifndef X86_DECLARATIONS diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index e4b6e128761b3..5631a01824442 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -292,6 +292,18 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { }; format!("{prefix}{bits}") } + + fn print_result_rust(&self) -> String { + let return_value = match self.kind() { + TypeKind::Float if self.inner_size() == 16 => "debug_f16(__return_value)", + _ if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) => { + "debug_i16(__return_value)" + } + _ => "format_args!(\"{__return_value:.150?}\")", + }; + + String::from(return_value) + } } impl X86IntrinsicType { From 2c632751da934300a6c35c69cd844c75f1bd41d8 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 6 Oct 2025 00:18:44 +0530 Subject: [PATCH 209/358] feat: make the debug_i16 into a generic debug_as function that adapts to base type --- .../crates/intrinsic-test/src/x86/config.rs | 54 +++++++++---------- .../crates/intrinsic-test/src/x86/types.rs | 12 +++-- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index bf2a37d78e91e..bf9f06640452c 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -207,45 +207,45 @@ impl DebugHexF16 for __m512i { } } -trait DebugI16 { +trait DebugAs { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result; } -impl DebugI16 for i16 { +impl DebugAs for T { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{}", self) } } -impl DebugI16 for __m128i { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let array = unsafe { core::mem::transmute::<_, [i16; 8]>(*self) }; - debug_simd_finish(f, "__m128i", &array) - } -} - -impl DebugI16 for __m256i { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let array = unsafe { core::mem::transmute::<_, [i16; 16]>(*self) }; - debug_simd_finish(f, "__m256i", &array) - } -} - -impl DebugI16 for __m512i { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let array = unsafe { core::mem::transmute::<_, [i16; 32]>(*self) }; - debug_simd_finish(f, "__m512i", &array) - } -} - -fn debug_i16(x: T) -> impl core::fmt::Debug { - struct DebugWrapper(T); - impl core::fmt::Debug for DebugWrapper { +macro_rules! impl_debug_as { + ($simd:ty, $name:expr, $bits:expr, [$($type:ty),+]) => { + $( + impl DebugAs<$type> for $simd { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + const ELEMENT_BITS: usize = core::mem::size_of::<$type>() * 8; + const NUM_ELEMENTS: usize = $bits / ELEMENT_BITS; + let array = unsafe { core::mem::transmute::<_, [$type; NUM_ELEMENTS]>(*self) }; + debug_simd_finish(f, $name, &array) + } + } + )+ + }; +} + +impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64]); +impl_debug_as!(__m256i, "__m256i", 256, [u8, i8, u16, i16, u32, i32, u64, i64]); +impl_debug_as!(__m512i, "__m512i", 512, [u8, i8, u16, i16, u32, i32, u64, i64]); + +fn debug_as(x: V) -> impl core::fmt::Debug +where V: DebugAs +{ + struct DebugWrapper(V, core::marker::PhantomData); + impl, T> core::fmt::Debug for DebugWrapper { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.0.fmt(f) } } - DebugWrapper(x) + DebugWrapper(x, core::marker::PhantomData) } "#; diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 5631a01824442..94600c989dc0d 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -295,14 +295,16 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { fn print_result_rust(&self) -> String { let return_value = match self.kind() { - TypeKind::Float if self.inner_size() == 16 => "debug_f16(__return_value)", - _ if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) => { - "debug_i16(__return_value)" + TypeKind::Float if self.inner_size() == 16 => "debug_f16(__return_value)".to_string(), + TypeKind::Int(_) + if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) => + { + format!("debug_as::<_, u{}>(__return_value)", self.inner_size()) } - _ => "format_args!(\"{__return_value:.150?}\")", + _ => "format_args!(\"{__return_value:.150?}\")".to_string(), }; - String::from(return_value) + return_value } } From 9be2a16c37f0d374c7fd420a80c1165dbfd89f94 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 8 Oct 2025 18:14:18 +0530 Subject: [PATCH 210/358] feat: casting the results of the lane function by preserving the bits instead of letting C++ do it (and potentially change the bits) --- .../crates/intrinsic-test/src/arm/types.rs | 19 +++++------- .../src/common/intrinsic_helpers.rs | 29 ++++++++++++------- .../crates/intrinsic-test/src/x86/types.rs | 22 +++++--------- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/types.rs b/stdarch/crates/intrinsic-test/src/arm/types.rs index e86a2c5189f0b..c798cbe42d03f 100644 --- a/stdarch/crates/intrinsic-test/src/arm/types.rs +++ b/stdarch/crates/intrinsic-test/src/arm/types.rs @@ -112,12 +112,10 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { ty = self.c_single_vector_type(), lanes = (0..self.num_lanes()) .map(move |idx| -> std::string::String { + let lane_fn = self.get_lane_function(); + let final_cast = self.generate_final_type_cast(); format!( - "{cast}{lane_fn}(__return_value.val[{vector}], {lane})", - cast = self.c_promotion(), - lane_fn = self.get_lane_function(), - lane = idx, - vector = vector, + "{final_cast}{lane_fn}(__return_value.val[{vector}], {idx})" ) }) .collect::>() @@ -129,12 +127,9 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { } else if self.num_lanes() > 1 { (0..self.num_lanes()) .map(|idx| -> std::string::String { - format!( - "{cast}{lane_fn}(__return_value, {lane})", - cast = self.c_promotion(), - lane_fn = self.get_lane_function(), - lane = idx - ) + let lane_fn = self.get_lane_function(); + let final_cast = self.generate_final_type_cast(); + format!("{final_cast}{lane_fn}(__return_value, {idx})") }) .collect::>() .join(r#" << ", " << "#) @@ -150,7 +145,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { TypeKind::Poly => format!("poly{}_t", self.inner_size()), ty => todo!("print_result_c - Unknown type: {:#?}", ty), }, - promote = self.c_promotion(), + promote = self.generate_final_type_cast(), ) }; diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index c0b9ed253539d..aa8613206ea0b 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -173,9 +173,9 @@ impl IntrinsicType { bit_len: Some(8), .. } => match kind { - TypeKind::Int(Sign::Signed) => "(int)", - TypeKind::Int(Sign::Unsigned) => "(unsigned int)", - TypeKind::Poly => "(unsigned int)(uint8_t)", + TypeKind::Int(Sign::Signed) => "int", + TypeKind::Int(Sign::Unsigned) => "unsigned int", + TypeKind::Poly => "uint8_t", _ => "", }, IntrinsicType { @@ -184,9 +184,9 @@ impl IntrinsicType { .. } => match bit_len { 8 => unreachable!("handled above"), - 16 => "(uint16_t)", - 32 => "(uint32_t)", - 64 => "(uint64_t)", + 16 => "uint16_t", + 32 => "uint32_t", + 64 => "uint64_t", 128 => "", _ => panic!("invalid bit_len"), }, @@ -195,16 +195,16 @@ impl IntrinsicType { bit_len: Some(bit_len), .. } => match bit_len { - 16 => "(float16_t)", - 32 => "(float)", - 64 => "(double)", + 16 => "float16_t", + 32 => "float", + 64 => "double", 128 => "", _ => panic!("invalid bit_len"), }, IntrinsicType { kind: TypeKind::Char(_), .. - } => "(char)", + } => "char", _ => "", } } @@ -391,4 +391,13 @@ pub trait IntrinsicTypeDefinition: Deref { bits = self.inner_size() ) } + + fn generate_final_type_cast(&self) -> String { + let type_data = self.c_promotion(); + if type_data.len() > 2 { + format!("({type_data})") + } else { + String::new() + } + } } diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 94600c989dc0d..4ade0fa136fc8 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -185,7 +185,7 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { .map(move |idx| -> std::string::String { format!( "{cast}{lane_fn}(__return_value.val[{vector}], {lane})", - cast = self.c_promotion(), + cast = self.generate_final_type_cast(), lane_fn = self.get_lane_function(), lane = idx, vector = vector, @@ -200,12 +200,13 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { } else if self.num_lanes() > 1 { (0..self.num_lanes()) .map(|idx| -> std::string::String { - format!( - "{cast}{lane_fn}(__return_value, {lane})", - cast = self.c_promotion(), - lane_fn = self.get_lane_function(), - lane = idx - ) + let cast_type = self.c_promotion(); + let lane_fn = self.get_lane_function(); + if cast_type.len() > 2 { + format!("({cast_type})({lane_fn}(__return_value, {idx}))") + } else { + format!("{lane_fn}(__return_value, {idx})") + } }) .collect::>() .join(r#" << ", " << "#) @@ -224,13 +225,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { "__m{}i", self.bit_len.expect(format!("self: {:#?}", self).as_str()) ), - // TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(), - // TypeKind::Int(true) if self.results().inner_size() == 64 => "long".to_string(), - // TypeKind::Int(false) if self.results().inner_size() == 64 => "unsigned long".to_string(), - // TypeKind::Int(true) if self.results().inner_size() == 32 => "int".to_string(), - // TypeKind::Int(false) if self.results().inner_size() == 32 => "unsigned int".to_string(), - // TypeKind::Int(true) if self.results().inner_size() == 16 => "short".to_string(), - // TypeKind::Int(false) if self.results().inner_size() == 16 => "unsigned short".to_string(), _ => self.c_scalar_type(), }, promote = self.c_promotion(), From 45796cfd4a512403fc578a218b678d89ff2d86d4 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 8 Oct 2025 19:59:39 +0530 Subject: [PATCH 211/358] fix: update the display of uint8_t type in C++ --- stdarch/crates/intrinsic-test/src/arm/config.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/stdarch/crates/intrinsic-test/src/arm/config.rs b/stdarch/crates/intrinsic-test/src/arm/config.rs index 354d8f50b43b8..e2bc501127dd3 100644 --- a/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -9,6 +9,7 @@ std::ostream& operator<<(std::ostream& os, poly128_t value); #endif std::ostream& operator<<(std::ostream& os, float16_t value); +std::ostream& operator<<(std::ostream& os, uint8_t value); // T1 is the `To` type, T2 is the `From` type template T1 cast(T2 x) { @@ -44,6 +45,11 @@ std::ostream& operator<<(std::ostream& os, float16_t value) { os << ss.str(); return os; } + +std::ostream& operator<<(std::ostream& os, uint8_t value) { + os << (unsigned int) value; + return os; +} "#; // Format f16 values (and vectors containing them) in a way that is consistent with C. From 4ba6d5689c0d76264db8397bc61abd267197080e Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 8 Oct 2025 22:48:37 +0530 Subject: [PATCH 212/358] Explicitly cast bits instead of allowing C++ to automatically cast the same (during typecasting) --- stdarch/crates/intrinsic-test/src/x86/types.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index 4ade0fa136fc8..f28d4d4988c89 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -203,7 +203,7 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { let cast_type = self.c_promotion(); let lane_fn = self.get_lane_function(); if cast_type.len() > 2 { - format!("({cast_type})({lane_fn}(__return_value, {idx}))") + format!("cast<{cast_type}>({lane_fn}(__return_value, {idx}))") } else { format!("{lane_fn}(__return_value, {idx})") } @@ -227,7 +227,7 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { ), _ => self.c_scalar_type(), }, - promote = self.c_promotion(), + promote = self.generate_final_type_cast(), ) }; From cbfdcb2a5bda43891c832267e91b9ce20c28c11f Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 9 Oct 2025 21:16:44 +0530 Subject: [PATCH 213/358] feat: update cast<> function to reduce spurious cast functions (cases like integer to float or vice versa) --- stdarch/crates/intrinsic-test/src/x86/config.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index bf9f06640452c..28c1a7d3ade26 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -320,10 +320,9 @@ pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" #define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) #define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - // T1 is the `To` type, T2 is the `From` type template T1 cast(T2 x) { - if constexpr (std::is_convertible_v) { + if constexpr ((std::is_integral_v && std::is_integral_v) || (std::is_floating_point_v && std::is_floating_point_v)) { return x; } else if constexpr (sizeof(T1) == sizeof(T2)) { T1 ret{}; From 6595708799986e2fb57f2dac6ea6bf764651631c Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 9 Oct 2025 21:19:35 +0530 Subject: [PATCH 214/358] Feat: Compile C++ testfiles using C++23 standard --- stdarch/crates/intrinsic-test/src/x86/compile.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/compile.rs b/stdarch/crates/intrinsic-test/src/x86/compile.rs index 27fd5d831c492..60997a1278a4d 100644 --- a/stdarch/crates/intrinsic-test/src/x86/compile.rs +++ b/stdarch/crates/intrinsic-test/src/x86/compile.rs @@ -34,7 +34,7 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { "-mavx512vnni", "-mavx512vpopcntdq", "-ferror-limit=1000", - "-std=c++17", + "-std=c++23", ]); if !cpp_compiler.contains("clang") { From bf4d9dc04dca5a4a819d70a1b36936969046535f Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 10 Oct 2025 02:18:08 +0530 Subject: [PATCH 215/358] Feat: allow downcasting (useful for certain cases where uint32_t needs to be cast to float16_t because the bits are stored in the lower half of the type) --- stdarch/crates/intrinsic-test/src/x86/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 28c1a7d3ade26..6be3f1b133896 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -324,7 +324,7 @@ pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" template T1 cast(T2 x) { if constexpr ((std::is_integral_v && std::is_integral_v) || (std::is_floating_point_v && std::is_floating_point_v)) { return x; - } else if constexpr (sizeof(T1) == sizeof(T2)) { + } else if constexpr (sizeof(T1) <= sizeof(T2)) { T1 ret{}; std::memcpy(&ret, &x, sizeof(T1)); return ret; From c2c930a1c50c7291141a8a21d1e1d4249399b300 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 10 Oct 2025 12:49:30 +0530 Subject: [PATCH 216/358] feat: explicitly casting the result of the lane function to unsigned variants for compatibility with the Rust version --- .../crates/intrinsic-test/src/x86/types.rs | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index f28d4d4988c89..cdfc6bfa98279 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -250,21 +250,21 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { .and_then(|(simd_len, bit_len)| Some(simd_len * bit_len)); match (self.bit_len, total_vector_bits) { - (Some(8), Some(128)) => String::from("_mm_extract_epi8"), - (Some(16), Some(128)) => String::from("_mm_extract_epi16"), - (Some(32), Some(128)) => String::from("_mm_extract_epi32"), - (Some(64), Some(128)) => String::from("_mm_extract_epi64"), - (Some(8), Some(256)) => String::from("_mm256_extract_epi8"), - (Some(16), Some(256)) => String::from("_mm256_extract_epi16"), - (Some(32), Some(256)) => String::from("_mm256_extract_epi32"), - (Some(64), Some(256)) => String::from("_mm256_extract_epi64"), - (Some(8), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi8"), - (Some(16), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi16"), - (Some(32), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi32"), - (Some(64), Some(512)) => String::from("_mm512_extract_intrinsic_test_epi64"), - (Some(8), Some(64)) => String::from("_mm64_extract_intrinsic_test_epi8"), - (Some(16), Some(64)) => String::from("_mm_extract_pi16"), - (Some(32), Some(64)) => String::from("_mm64_extract_intrinsic_test_epi32"), + (Some(8), Some(128)) => String::from("(uint8_t)_mm_extract_epi8"), + (Some(16), Some(128)) => String::from("(uint16_t)_mm_extract_epi16"), + (Some(32), Some(128)) => String::from("(uint32_t)_mm_extract_epi32"), + (Some(64), Some(128)) => String::from("(uint64_t)_mm_extract_epi64"), + (Some(8), Some(256)) => String::from("(uint8_t)_mm256_extract_epi8"), + (Some(16), Some(256)) => String::from("(uint16_t)_mm256_extract_epi16"), + (Some(32), Some(256)) => String::from("(uint32_t)_mm256_extract_epi32"), + (Some(64), Some(256)) => String::from("(uint64_t)_mm256_extract_epi64"), + (Some(8), Some(512)) => String::from("(uint8_t)_mm512_extract_intrinsic_test_epi8"), + (Some(16), Some(512)) => String::from("(uint16_t)_mm512_extract_intrinsic_test_epi16"), + (Some(32), Some(512)) => String::from("(uint32_t)_mm512_extract_intrinsic_test_epi32"), + (Some(64), Some(512)) => String::from("(uint64_t)_mm512_extract_intrinsic_test_epi64"), + (Some(8), Some(64)) => String::from("(uint8_t)_mm64_extract_intrinsic_test_epi8"), + (Some(16), Some(64)) => String::from("(uint16_t)_mm_extract_pi16"), + (Some(32), Some(64)) => String::from("(uint32_t)_mm64_extract_intrinsic_test_epi32"), _ => unreachable!( "invalid length for vector argument: {:?}, {:?}", self.bit_len, self.simd_len From b15c911d408da1f82e2963b612573dac455ae798 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sat, 11 Oct 2025 15:17:06 +0530 Subject: [PATCH 217/358] feat: updated exclusion list with more intrinsics, that can be fixed immediately --- stdarch/Cargo.lock | 77 +++++++++++++++++-- stdarch/ci/run.sh | 1 + stdarch/crates/intrinsic-test/Cargo.toml | 1 + stdarch/crates/intrinsic-test/missing_x86.txt | 32 +++++++- .../crates/intrinsic-test/src/common/cli.rs | 6 ++ .../intrinsic-test/src/common/compare.rs | 1 - stdarch/crates/intrinsic-test/src/x86/mod.rs | 13 +++- 7 files changed, 121 insertions(+), 10 deletions(-) diff --git a/stdarch/Cargo.lock b/stdarch/Cargo.lock index 70f09adf2c857..e198e14ffe178 100644 --- a/stdarch/Cargo.lock +++ b/stdarch/Cargo.lock @@ -282,6 +282,18 @@ dependencies = [ "wasi", ] +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -348,6 +360,7 @@ dependencies = [ "log", "pretty_env_logger", "quick-xml 0.37.5", + "rand 0.9.2", "rayon", "regex", "serde", @@ -473,7 +486,7 @@ checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" dependencies = [ "env_logger 0.8.4", "log", - "rand", + "rand 0.8.5", ] [[package]] @@ -485,6 +498,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "rand" version = "0.8.5" @@ -492,8 +511,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", ] [[package]] @@ -503,7 +532,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", ] [[package]] @@ -512,7 +551,16 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.16", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.4", ] [[package]] @@ -703,7 +751,7 @@ dependencies = [ name = "stdarch-gen-loongarch" version = "0.1.0" dependencies = [ - "rand", + "rand 0.8.5", ] [[package]] @@ -736,7 +784,7 @@ version = "0.0.0" dependencies = [ "core_arch", "quickcheck", - "rand", + "rand 0.8.5", ] [[package]] @@ -819,6 +867,15 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "wasmparser" version = "0.235.0" @@ -1003,6 +1060,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + [[package]] name = "xml-rs" version = "0.8.27" diff --git a/stdarch/ci/run.sh b/stdarch/ci/run.sh index a74769c56dba6..bd0e06687fa6a 100755 --- a/stdarch/ci/run.sh +++ b/stdarch/ci/run.sh @@ -94,6 +94,7 @@ case ${TARGET} in TEST_CXX_COMPILER="clang++" TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt + TEST_SAMPLE_INTRINSICS_PERCENTAGE=5 export STDARCH_DISABLE_ASSERT_INSTR=1 PATH="$PATH":"$(pwd)"/c_programs export PATH diff --git a/stdarch/crates/intrinsic-test/Cargo.toml b/stdarch/crates/intrinsic-test/Cargo.toml index 2c0f53897e797..9fb70f32f81bc 100644 --- a/stdarch/crates/intrinsic-test/Cargo.toml +++ b/stdarch/crates/intrinsic-test/Cargo.toml @@ -22,3 +22,4 @@ itertools = "0.14.0" quick-xml = { version = "0.37.5", features = ["serialize", "overlapped-lists"] } serde-xml-rs = "0.8.0" regex = "1.11.1" +rand = "0.9.2" diff --git a/stdarch/crates/intrinsic-test/missing_x86.txt b/stdarch/crates/intrinsic-test/missing_x86.txt index 824d36f60528a..e546799740c73 100644 --- a/stdarch/crates/intrinsic-test/missing_x86.txt +++ b/stdarch/crates/intrinsic-test/missing_x86.txt @@ -871,4 +871,34 @@ _m_pxor _m_to_int _m_to_int64 _mm512_mask_floor_pd -_mm512_mask_floor_ps \ No newline at end of file +_mm512_mask_floor_ps + +# SDE ERROR: Cannot execute XGETBV with ECX != 0 +_xgetbv + +# Miscellaneous issues that can be fixed first +_kshiftli_mask16 +_kshiftli_mask32 +_kshiftli_mask64 +_kshiftli_mask8 +_kshiftri_mask16 +_kshiftri_mask32 +_kshiftri_mask64 +_kshiftri_mask8 +_mm256_castsi128_si256 +_mm256_extract_epi16 +_mm256_extract_epi8 +_mm512_castsi128_si512 +_mm512_castsi256_si512 +_mm512_conj_pch +_mm512_mask_reduce_max_pd +_mm512_mask_reduce_max_ps +_mm512_mask_reduce_min_pd +_mm512_mask_reduce_min_ps +_mm_comineq_sh +_mm_extract_epi16 +_mm_extract_epi8 +_mm_mask_cvtepi16_epi8 +_mm_mask_cvtpd_epi32 +_mm_mask_cvtpd_ps +_mm_ucomineq_sh \ No newline at end of file diff --git a/stdarch/crates/intrinsic-test/src/common/cli.rs b/stdarch/crates/intrinsic-test/src/common/cli.rs index beae6a4b044da..461ab542ea897 100644 --- a/stdarch/crates/intrinsic-test/src/common/cli.rs +++ b/stdarch/crates/intrinsic-test/src/common/cli.rs @@ -54,6 +54,9 @@ pub struct Cli { /// Set the sysroot for the C++ compiler #[arg(long)] pub cxx_toolchain_dir: Option, + + #[arg(long, default_value_t = 100u8)] + pub sample_percentage: u8, } pub struct ProcessedCli { @@ -65,6 +68,7 @@ pub struct ProcessedCli { pub linker: Option, pub cxx_toolchain_dir: Option, pub skip: Vec, + pub sample_percentage: u8, } impl ProcessedCli { @@ -74,6 +78,7 @@ impl ProcessedCli { let target = cli_options.target; let linker = cli_options.linker; let cxx_toolchain_dir = cli_options.cxx_toolchain_dir; + let sample_percentage = cli_options.sample_percentage; let skip = if let Some(filename) = cli_options.skip { let data = std::fs::read_to_string(&filename).expect("Failed to open file"); @@ -108,6 +113,7 @@ impl ProcessedCli { cxx_toolchain_dir, skip, filename, + sample_percentage, } } } diff --git a/stdarch/crates/intrinsic-test/src/common/compare.rs b/stdarch/crates/intrinsic-test/src/common/compare.rs index 1ad00839ef026..89e5f965bc8e9 100644 --- a/stdarch/crates/intrinsic-test/src/common/compare.rs +++ b/stdarch/crates/intrinsic-test/src/common/compare.rs @@ -14,7 +14,6 @@ pub fn compare_outputs(intrinsic_name_list: &Vec, runner: &str, target: let intrinsics = intrinsic_name_list .par_iter() .filter_map(|intrinsic_name| { - let c = runner_command(runner) .arg("intrinsic-test-programs") .arg(intrinsic_name) diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index 1eac6fb5f9306..ca5748e5fb0c5 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -12,6 +12,8 @@ use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::X86IntrinsicType; use itertools::Itertools; +use rand::rng; +use rand::seq::IndexedRandom; use xml_parser::get_xml_intrinsics; pub struct X86ArchitectureTest { @@ -47,7 +49,10 @@ impl SupportedArchitectureTest for X86ArchitectureTest { let intrinsics = get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file"); - let mut intrinsics = intrinsics + let mut rng = rng(); + let sample_percentage: usize = cli_options.sample_percentage as usize; + + let intrinsics = intrinsics .into_iter() // Not sure how we would compare intrinsic that returns void. .filter(|i| i.results.kind() != TypeKind::Void) @@ -62,6 +67,12 @@ impl SupportedArchitectureTest for X86ArchitectureTest { .unique_by(|i| i.name.clone()) .collect::>(); + let sample_size = (intrinsics.len() * sample_percentage) / 100; + let mut intrinsics = intrinsics + .choose_multiple(&mut rng, sample_size) + .cloned() + .collect::>(); + intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); Self { intrinsics: intrinsics, From ec013740e933379bf35e641e4a58874edbfc68da Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 15 Oct 2025 14:12:49 +0530 Subject: [PATCH 218/358] chore: remove x86-intel.xml from `stdarch-verify` crate --- .../crates/stdarch-verify/tests/x86-intel.rs | 2 +- stdarch/crates/stdarch-verify/x86-intel.xml | 158422 --------------- 2 files changed, 1 insertion(+), 158423 deletions(-) delete mode 100644 stdarch/crates/stdarch-verify/x86-intel.xml diff --git a/stdarch/crates/stdarch-verify/tests/x86-intel.rs b/stdarch/crates/stdarch-verify/tests/x86-intel.rs index 02b6bdc76840e..5a98db980b23e 100644 --- a/stdarch/crates/stdarch-verify/tests/x86-intel.rs +++ b/stdarch/crates/stdarch-verify/tests/x86-intel.rs @@ -164,7 +164,7 @@ fn verify_all_signatures() { // Open up the network console and you'll see an xml file was downloaded // (currently called data-3.6.9.xml). That's the file we downloaded // here. - let xml = include_bytes!("../x86-intel.xml"); + let xml = include_bytes!("../../../intrinsics_data/x86-intel.xml"); let xml = &xml[..]; let data: Data = quick_xml::de::from_reader(xml).expect("failed to deserialize xml"); diff --git a/stdarch/crates/stdarch-verify/x86-intel.xml b/stdarch/crates/stdarch-verify/x86-intel.xml deleted file mode 100644 index 41f2119e681f9..0000000000000 --- a/stdarch/crates/stdarch-verify/x86-intel.xml +++ /dev/null @@ -1,158422 +0,0 @@ - - - - - - - - Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag). - -tmp[32:0] := a[31:0] + b[31:0] + (c_in > 0 ? 1 : 0) -MEM[out+31:out] := tmp[31:0] -dst[0] := tmp[32] -dst[7:1] := 0 - - - - ADX -
immintrin.h
- Arithmetic -
- - - - - - - Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). - -tmp[64:0] := a[63:0] + b[63:0] + (c_in > 0 ? 1 : 0) -MEM[out+63:out] := tmp[63:0] -dst[0] := tmp[64] -dst[7:1] := 0 - - - - ADX -
immintrin.h
- Arithmetic -
- - - - - Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"." - a[127:0] := ShiftRows(a[127:0]) -a[127:0] := SubBytes(a[127:0]) -a[127:0] := MixColumns(a[127:0]) -dst[127:0] := a[127:0] XOR RoundKey[127:0] - - - AES -
wmmintrin.h
- Cryptography -
- - - - - Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"." - a[127:0] := ShiftRows(a[127:0]) -a[127:0] := SubBytes(a[127:0]) -dst[127:0] := a[127:0] XOR RoundKey[127:0] - - - AES -
wmmintrin.h
- Cryptography -
- - - - - Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst". - a[127:0] := InvShiftRows(a[127:0]) -a[127:0] := InvSubBytes(a[127:0]) -a[127:0] := InvMixColumns(a[127:0]) -dst[127:0] := a[127:0] XOR RoundKey[127:0] - - - AES -
wmmintrin.h
- Cryptography -
- - - - - Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst". - a[127:0] := InvShiftRows(a[127:0]) -a[127:0] := InvSubBytes(a[127:0]) -dst[127:0] := a[127:0] XOR RoundKey[127:0] - - - AES -
wmmintrin.h
- Cryptography -
- - - - Perform the InvMixColumns transformation on "a" and store the result in "dst". - dst[127:0] := InvMixColumns(a[127:0]) - - - AES -
wmmintrin.h
- Cryptography -
- - - - - Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from "a" and an 8-bit round constant specified in "imm8", and store the result in "dst"." - X3[31:0] := a[127:96] -X2[31:0] := a[95:64] -X1[31:0] := a[63:32] -X0[31:0] := a[31:0] -RCON[31:0] := ZeroExtend32(imm8[7:0]) -dst[31:0] := SubWord(X1) -dst[63:32] := RotWord(SubWord(X1)) XOR RCON -dst[95:64] := SubWord(X3) -dst[127:96] := RotWord(SubWord(X3)) XOR RCON - - - AES -
wmmintrin.h
- Cryptography -
- - - - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in tiles "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". - FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (a.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.fp32[n] += FP32(a.row[m].bf16[2*k+0]) * FP32(b.row[k].bf16[2*n+0]) - tmp.fp32[n] += FP32(a.row[m].bf16[2*k+1]) * FP32(b.row[k].bf16[2*n+1]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-BF16 -
immintrin.h
- Application-Targeted -
- - - Compute dot-product of BF16 (16-bit) floating-point pairs in tiles "src0" and "src1", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (src0.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.fp32[n] += FP32(src0.row[m].bf16[2*k+0]) * FP32(src1.row[k].bf16[2*n+0]) - tmp.fp32[n] += FP32(src0.row[m].bf16[2*k+1]) * FP32(src1.row[k].bf16[2*n+1]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-BF16 -
immintrin.h
- Application-Targeted -
- - - - - - - Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "a" and "b" is interpreted as a complex number with FP16 real part and FP16 imaginary part. Calculates the imaginary part of the result. For each possible combination of (row of "a", column of "b"), it performs a set of multiplication and accumulations on all corresponding complex numbers (one from "a" and one from "b"). The imaginary part of the "a" element is multiplied with the real part of the corresponding "b" element, and the real part of the "a" element is multiplied with the imaginary part of the corresponding "b" elements. The two accumulated results are added, and then accumulated into the corresponding row and column of "dst". - FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (a.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) - tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-COMPLEX -
immintrin.h
- Application-Targeted -
- - - - - - Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "a" and "b" is interpreted as a complex number with FP16 real part and FP16 imaginary part. Calculates the real part of the result. For each possible combination of (row of "a", column of "b"), it performs a set of multiplication and accumulations on all corresponding complex numbers (one from "a" and one from "b"). The real part of the "a" element is multiplied with the real part of the corresponding "b" element, and the negated imaginary part of the "a" element is multiplied with the imaginary part of the corresponding "b" elements. The two accumulated results are added, and then accumulated into the corresponding row and column of "dst". - FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (a.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) - tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-COMPLEX -
immintrin.h
- Application-Targeted -
- - - Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles "src0" and "src1" is interpreted as a complex number with FP16 real part and FP16 imaginary part. This function calculates the imaginary part of the result. - - FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (src0.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+1]) - tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+0]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-COMPLEX -
immintrin.h
- Application-Targeted -
- - - Perform matrix multiplication of two tiles containing complex elements and accumulate the results into a packed single precision tile. Each dword element in input tiles src0 and src1 is interpreted as a complex number with FP16 real part and FP16 imaginary part. This function calculates the real part of the result. - - FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (src0.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+0]) - tmp.fp32[n] += FP32(-src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+1]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-COMPLEX -
immintrin.h
- Application-Targeted -
- - - - - - - Compute dot-product of FP16 (16-bit) floating-point pairs in tiles "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". - FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (a.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) - tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-FP16 -
immintrin.h
- Application-Targeted -
- - - Compute dot-product of FP16 (16-bit) floating-point pairs in tiles "src0" and "src1", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (src0.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+0]) * FP32(src1.row[k].fp16[2*n+0]) - tmp.fp32[n] += FP32(src0.row[m].fp16[2*k+1]) * FP32(src1.row[k].fp16[2*n+1]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-FP16 -
immintrin.h
- Application-Targeted -
- - - - - - - Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "a" with corresponding unsigned 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". - DEFINE DPBD(c, x, y) { - tmp1 := SignExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) - tmp2 := SignExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) - tmp3 := SignExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) - tmp4 := SignExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) - - RETURN c + tmp1 + tmp2 + tmp3 + tmp4 -} -FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (a.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-INT8 -
immintrin.h
- Application-Targeted -
- - - - - - Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". - DEFINE DPBD(c, x, y) { - tmp1 := ZeroExtend32(x.byte[0]) * SignExtend32(y.byte[0]) - tmp2 := ZeroExtend32(x.byte[1]) * SignExtend32(y.byte[1]) - tmp3 := ZeroExtend32(x.byte[2]) * SignExtend32(y.byte[2]) - tmp4 := ZeroExtend32(x.byte[3]) * SignExtend32(y.byte[3]) - - RETURN c + tmp1 + tmp2 + tmp3 + tmp4 -} -FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (a.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-INT8 -
immintrin.h
- Application-Targeted -
- - - - - - Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding unsigned 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". - DEFINE DPBD(c, x, y) { - tmp1 := ZeroExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) - tmp2 := ZeroExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) - tmp3 := ZeroExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) - tmp4 := ZeroExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) - - RETURN c + tmp1 + tmp2 + tmp3 + tmp4 -} -FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (a.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-INT8 -
immintrin.h
- Application-Targeted -
- - - - - - Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". - DEFINE DPBD(c, x, y) { - tmp1 := SignExtend32(x.byte[0]) * SignExtend32(y.byte[0]) - tmp2 := SignExtend32(x.byte[1]) * SignExtend32(y.byte[1]) - tmp3 := SignExtend32(x.byte[2]) * SignExtend32(y.byte[2]) - tmp4 := SignExtend32(x.byte[3]) * SignExtend32(y.byte[3]) - - RETURN c + tmp1 + tmp2 + tmp3 + tmp4 -} -FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (a.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-INT8 -
immintrin.h
- Application-Targeted -
- - - Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "src0" with corresponding signed 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - DEFINE DPBD(c, x, y) { - tmp1 := SignExtend32(x.byte[0]) * SignExtend32(y.byte[0]) - tmp2 := SignExtend32(x.byte[1]) * SignExtend32(y.byte[1]) - tmp3 := SignExtend32(x.byte[2]) * SignExtend32(y.byte[2]) - tmp4 := SignExtend32(x.byte[3]) * SignExtend32(y.byte[3]) - RETURN c + tmp1 + tmp2 + tmp3 + tmp4 -} -FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (src0.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-INT8 -
immintrin.h
- Application-Targeted -
- - - Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "src0" with corresponding unsigned 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - DEFINE DPBD(c, x, y) { - tmp1 := SignExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) - tmp2 := SignExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) - tmp3 := SignExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) - tmp4 := SignExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) - RETURN c + tmp1 + tmp2 + tmp3 + tmp4 -} -FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (src0.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-INT8 -
immintrin.h
- Application-Targeted -
- - - Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "src0" with corresponding signed 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - DEFINE DPBD(c, x, y) { - tmp1 := ZeroExtend32(x.byte[0]) * SignExtend32(y.byte[0]) - tmp2 := ZeroExtend32(x.byte[1]) * SignExtend32(y.byte[1]) - tmp3 := ZeroExtend32(x.byte[2]) * SignExtend32(y.byte[2]) - tmp4 := ZeroExtend32(x.byte[3]) * SignExtend32(y.byte[3]) - RETURN c + tmp1 + tmp2 + tmp3 + tmp4 -} -FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (src0.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-INT8 -
immintrin.h
- Application-Targeted -
- - - Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "src0" with corresponding unsigned 8-bit integers in "src1", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - DEFINE DPBD(c, x, y) { - tmp1 := ZeroExtend32(x.byte[0]) * ZeroExtend32(y.byte[0]) - tmp2 := ZeroExtend32(x.byte[1]) * ZeroExtend32(y.byte[1]) - tmp3 := ZeroExtend32(x.byte[2]) * ZeroExtend32(y.byte[2]) - tmp4 := ZeroExtend32(x.byte[3]) * ZeroExtend32(y.byte[3]) - RETURN c + tmp1 + tmp2 + tmp3 + tmp4 -} -FOR m := 0 TO dst.rows - 1 - tmp := dst.row[m] - FOR k := 0 TO (src0.colsb / 4) - 1 - FOR n := 0 TO (dst.colsb / 4) - 1 - tmp.dword[n] := DPBD(tmp.dword[n], src0.row[m].dword[k], src1.row[k].dword[n]) - ENDFOR - ENDFOR - write_row_and_zero(dst, m, tmp, dst.colsb) -ENDFOR -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-INT8 -
immintrin.h
- Application-Targeted -
- - - - - Load tile configuration from a 64-byte memory location specified by "mem_addr". The tile configuration format is specified below, and includes the tile type pallette, the number of bytes per row, and the number of rows. If the specified pallette_id is zero, that signifies the init state for both the tile config and the tile data, and the tiles are zeroed. Any invalid configurations will result in #GP fault. - -// format of memory payload. each field is a byte. -// 0: palette -// 1: start_row -// 2-15: reserved, must be zero -// 16-17: tile0.colsb -// 18-19: tile1.colsb -// 20-21: tile2.colsb -// ... -// 30-31: tile7.colsb -// 32-47: reserved, must be zero -// 48: tile0.rows -// 49: tile1.rows -// 50: tile2.rows -// ... -// 55: tile7.rows -// 56-63: reserved, must be zero - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - - Stores the current tile configuration to a 64-byte memory location specified by "mem_addr". The tile configuration format is specified below, and includes the tile type pallette, the number of bytes per row, and the number of rows. If tiles are not configured, all zeroes will be stored to memory. - -// format of memory payload. each field is a byte. -// 0: palette -// 1: start_row -// 2-15: reserved, must be zero -// 16-17: tile0.colsb -// 18-19: tile1.colsb -// 20-21: tile2.colsb -// ... -// 30-31: tile7.colsb -// 32-47: reserved, must be zero -// 48: tile0.rows -// 49: tile1.rows -// 50: tile2.rows -// ... -// 55: tile7.rows -// 56-63: reserved, must be zero - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - - - - Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". - start := tileconfig.startRow -IF start == 0 // not restarting, zero incoming state - tilezero(dst) -FI -nbytes := dst.colsb -DO WHILE start < dst.rows - memptr := base + start * stride - write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) - start := start + 1 -OD -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - - - - Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly. - start := tileconfig.startRow -IF start == 0 // not restarting, zero incoming state - tilezero(dst) -FI -nbytes := dst.colsb -DO WHILE start < dst.rows - memptr := base + start * stride - write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) - start := start + 1 -OD -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - Release the tile configuration to return to the init state, which releases all storage it currently holds. - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - - - - Store the tile specified by "src" to memory specifieid by "base" address and "stride" using the tile configuration previously configured via "_tile_loadconfig". - start := tileconfig.startRow -DO WHILE start < src.rows - memptr := base + start * stride - write_memory(memptr, src.colsb, src.row[start]) - start := start + 1 -OD -zero_tileconfig_start() - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - - Zero the tile specified by "tdest". - nbytes := palette_table[tileconfig.palette_id].bytes_per_row -FOR i := 0 TO palette_table[tileconfig.palette_id].max_rows-1 - FOR j := 0 TO nbytes-1 - tdest.row[i].byte[j] := 0 - ENDFOR -ENDFOR - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - start := tileconfig.startRow -IF start == 0 // not restarting, zero incoming state - tilezero(dst) -FI -nbytes := dst.colsb -DO WHILE start < dst.rows - memptr := base + start * stride - write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) - start := start + 1 -OD -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - Store the tile specified by "src" to memory specifieid by "base" address and "stride". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - start := tileconfig.startRow -DO WHILE start < src.rows - memptr := base + start * stride - write_memory(memptr, src.colsb, src.row[start]) - start := start + 1 -OD -zero_tileconfig_start() - - - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly. The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - start := tileconfig.startRow -IF start == 0 // not restarting, zero incoming state - tilezero(dst) -FI -nbytes := dst.colsb -DO WHILE start < dst.rows - memptr := base + start * stride - write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes) - start := start + 1 -OD -zero_upper_rows(dst, dst.rows) -zero_tileconfig_start() - - - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - Zero the tile specified by "dst". The shape of tile is specified in the struct of __tile1024i. The register of the tile is allocated by compiler. - - nbytes := palette_table[tileconfig.palette_id].bytes_per_row -FOR i := 0 TO palette_table[tileconfig.palette_id].max_rows-1 - FOR j := 0 TO nbytes-1 - tdest.row[i].byte[j] := 0 - ENDFOR -ENDFOR - - - AMX-TILE -
immintrin.h
- Application-Targeted -
- - - - - Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ACOS(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ACOS(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ACOSH(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ACOSH(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ASIN(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ASIN(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ASINH(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ASINH(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ATAN(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ATAN(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - - Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - - Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ATANH(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ATANH(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := COS(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := COS(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := COSD(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := COSD(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := COSH(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := COSH(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SIN(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SIN(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - - Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SIN(a[i+63:i]) - MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - - Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SIN(a[i+31:i]) - MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SIND(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SIND(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SINH(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SINH(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := TAN(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := TAN(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := TAND(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := TAND(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := TANH(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := TANH(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Trigonometry -
- - - - Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := CubeRoot(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := CubeRoot(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". - -DEFINE CEXP(a[31:0], b[31:0]) { - result[31:0] := POW(FP32(e), a[31:0]) * COS(b[31:0]) - result[63:32] := POW(FP32(e), a[31:0]) * SIN(b[31:0]) - RETURN result -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := CEXP(a[i+31:i], a[i+63:i+32]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". - -DEFINE CLOG(a[31:0], b[31:0]) { - result[31:0] := LOG(SQRT(POW(a, 2.0) + POW(b, 2.0))) - result[63:32] := ATAN2(b, a) - RETURN result -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := CLOG(a[i+31:i], a[i+63:i+32]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed complex snumbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". - -DEFINE CSQRT(a[31:0], b[31:0]) { - sign[31:0] := (b < 0.0) ? -FP32(1.0) : FP32(1.0) - result[31:0] := SQRT((a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) - result[63:32] := sign * SQRT((-a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) - RETURN result -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := CSQRT(a[i+31:i], a[i+63:i+32]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := POW(e, a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := POW(FP32(e), a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := POW(10.0, a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := POW(2.0, a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := InvCubeRoot(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := InvCubeRoot(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := InvSQRT(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := InvSQRT(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := LOG(1.0 + a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := LOG(1.0 + a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SQRT(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SQRT(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := CDFNormal(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := CDFNormal(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := InverseCDFNormal(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := InverseCDFNormal(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ERF(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ERF(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := 1.0 - ERF(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+63:i] := 1.0 - ERF(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := 1.0 / ERF(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*32 - dst[i+63:i] := 1.0 / ERF(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Probability/Statistics -
- - - - - Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 31 - i := 8*j - IF b[i+7:i] == 0 - #DE - FI - dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 15 - i := 16*j - IF b[i+15:i] == 0 - #DE - FI - dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 7 - i := 32*j - IF b[i+31:i] == 0 - #DE - FI - dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 3 - i := 64*j - IF b[i+63:i] == 0 - #DE - FI - dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 31 - i := 8*j - IF b[i+7:i] == 0 - #DE - FI - dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 15 - i := 16*j - IF b[i+15:i] == 0 - #DE - FI - dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 7 - i := 32*j - IF b[i+31:i] == 0 - #DE - FI - dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 3 - i := 64*j - IF b[i+63:i] == 0 - #DE - FI - dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - - Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr". - FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) - MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 31 - i := 8*j - dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 15 - i := 16*j - dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 3 - i := 64*j - dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 31 - i := 8*j - dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 15 - i := 16*j - dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 3 - i := 64*j - dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr". - FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) - MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Arithmetic -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := CEIL(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := CEIL(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := FLOOR(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := FLOOR(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ROUND(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ROUND(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Special Math Functions -
- - - - Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := TRUNCATE(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Miscellaneous -
- - - - Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := TRUNCATE(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Miscellaneous -
- - - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] + b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] + b[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] + b[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - dst[i+63:i] := a[i+63:i] / b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := a[i+31:i] / b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - - Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". - -DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { - FOR j := 0 to 3 - i := j*32 - IF imm8[(4+j)%8] - temp[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - temp[i+31:i] := FP32(0.0) - FI - ENDFOR - - sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0]) - - FOR j := 0 to 3 - i := j*32 - IF imm8[j%8] - tmpdst[i+31:i] := sum[31:0] - ELSE - tmpdst[i+31:i] := FP32(0.0) - FI - ENDFOR - RETURN tmpdst[127:0] -} -dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) -dst[255:128] := DP(a[255:128], b[255:128], imm8[7:0]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". - -dst[63:0] := a[127:64] + a[63:0] -dst[127:64] := b[127:64] + b[63:0] -dst[191:128] := a[255:192] + a[191:128] -dst[255:192] := b[255:192] + b[191:128] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". - -dst[31:0] := a[63:32] + a[31:0] -dst[63:32] := a[127:96] + a[95:64] -dst[95:64] := b[63:32] + b[31:0] -dst[127:96] := b[127:96] + b[95:64] -dst[159:128] := a[191:160] + a[159:128] -dst[191:160] := a[255:224] + a[223:192] -dst[223:192] := b[191:160] + b[159:128] -dst[255:224] := b[255:224] + b[223:192] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". - -dst[63:0] := a[63:0] - a[127:64] -dst[127:64] := b[63:0] - b[127:64] -dst[191:128] := a[191:128] - a[255:192] -dst[255:192] := b[191:128] - b[255:192] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". - -dst[31:0] := a[31:0] - a[63:32] -dst[63:32] := a[95:64] - a[127:96] -dst[95:64] := b[31:0] - b[63:32] -dst[127:96] := b[95:64] - b[127:96] -dst[159:128] := a[159:128] - a[191:160] -dst[191:160] := a[223:192] - a[255:224] -dst[223:192] := b[159:128] - b[191:160] -dst[255:224] := b[223:192] - b[255:224] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] * b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] * b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] - b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Arithmetic -
- - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] OR b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] OR b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value. - -IF ((a[255:0] AND b[255:0]) == 0) - ZF := 1 -ELSE - ZF := 0 -FI -IF (((NOT a[255:0]) AND b[255:0]) == 0) - CF := 1 -ELSE - CF := 0 -FI -RETURN ZF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value. - -IF ((a[255:0] AND b[255:0]) == 0) - ZF := 1 -ELSE - ZF := 0 -FI -IF (((NOT a[255:0]) AND b[255:0]) == 0) - CF := 1 -ELSE - CF := 0 -FI -RETURN CF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. - -IF ((a[255:0] AND b[255:0]) == 0) - ZF := 1 -ELSE - ZF := 0 -FI -IF (((NOT a[255:0]) AND b[255:0]) == 0) - CF := 1 -ELSE - CF := 0 -FI -IF (ZF == 0 && CF == 0) - dst := 1 -ELSE - dst := 0 -FI - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. - -tmp[255:0] := a[255:0] AND b[255:0] -IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[255:0] := (NOT a[255:0]) AND b[255:0] -IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := ZF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. - -tmp[255:0] := a[255:0] AND b[255:0] -IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[255:0] := (NOT a[255:0]) AND b[255:0] -IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := CF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. - -tmp[255:0] := a[255:0] AND b[255:0] -IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[255:0] := (NOT a[255:0]) AND b[255:0] -IF (tmp[63] == 0 && tmp[127] == 0 && tmp[191] == 0 && tmp[255] == 0) - CF := 1 -ELSE - CF := 0 -FI -IF (ZF == 0 && CF == 0) - dst := 1 -ELSE - dst := 0 -FI - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. - -tmp[127:0] := a[127:0] AND b[127:0] -IF (tmp[63] == 0 && tmp[127] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[127:0] := (NOT a[127:0]) AND b[127:0] -IF (tmp[63] == 0 && tmp[127] == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := ZF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. - -tmp[127:0] := a[127:0] AND b[127:0] -IF (tmp[63] == 0 && tmp[127] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[127:0] := (NOT a[127:0]) AND b[127:0] -IF (tmp[63] == 0 && tmp[127] == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := CF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. - -tmp[127:0] := a[127:0] AND b[127:0] -IF (tmp[63] == 0 && tmp[127] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[127:0] := (NOT a[127:0]) AND b[127:0] -IF (tmp[63] == 0 && tmp[127] == 0) - CF := 1 -ELSE - CF := 0 -FI -IF (ZF == 0 && CF == 0) - dst := 1 -ELSE - dst := 0 -FI - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. - -tmp[255:0] := a[255:0] AND b[255:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ - tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[255:0] := (NOT a[255:0]) AND b[255:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ - tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := ZF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. - -tmp[255:0] := a[255:0] AND b[255:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ - tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[255:0] := (NOT a[255:0]) AND b[255:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ - tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := CF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. - -tmp[255:0] := a[255:0] AND b[255:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ - tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[255:0] := (NOT a[255:0]) AND b[255:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0 && \ - tmp[159] == 0 && tmp[191] == 0 && tmp[223] == 0 && tmp[255] == 0) - CF := 1 -ELSE - CF := 0 -FI -IF (ZF == 0 && CF == 0) - dst := 1 -ELSE - dst := 0 -FI - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value. - -tmp[127:0] := a[127:0] AND b[127:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[127:0] := (NOT a[127:0]) AND b[127:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := ZF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value. - -tmp[127:0] := a[127:0] AND b[127:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[127:0] := (NOT a[127:0]) AND b[127:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := CF - - - AVX -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. - -tmp[127:0] := a[127:0] AND b[127:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) - ZF := 1 -ELSE - ZF := 0 -FI -tmp[127:0] := (NOT a[127:0]) AND b[127:0] -IF (tmp[31] == 0 && tmp[63] == 0 && tmp[95] == 0 && tmp[127] == 0) - CF := 1 -ELSE - CF := 0 -FI -IF (ZF == 0 && CF == 0) - dst := 1 -ELSE - dst := 0 -FI - - - AVX -
immintrin.h
- Logical -
- - - - - - Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF imm8[j] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF imm8[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF mask[i+63] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF mask[i+31] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". - -dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] -dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] -dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -dst[127:96] := SELECT4(b[127:0], imm8[7:6]) -dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -dst[223:192] := SELECT4(b[255:128], imm8[5:4]) -dst[255:224] := SELECT4(b[255:128], imm8[7:6]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -ESAC -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -ESAC -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -ESAC -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Extract a 32-bit integer from "a", selected with "index", and store the result in "dst". - -dst[31:0] := (a[255:0] >> (index[2:0] * 32))[31:0] - - AVX -
immintrin.h
- Swizzle -
- - - - - Extract a 64-bit integer from "a", selected with "index", and store the result in "dst". - -dst[63:0] := (a[255:0] >> (index[1:0] * 64))[63:0] - - AVX -
immintrin.h
- Swizzle -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], b[1:0]) -dst[63:32] := SELECT4(a[127:0], b[33:32]) -dst[95:64] := SELECT4(a[127:0], b[65:64]) -dst[127:96] := SELECT4(a[127:0], b[97:96]) -dst[159:128] := SELECT4(a[255:128], b[129:128]) -dst[191:160] := SELECT4(a[255:128], b[161:160]) -dst[223:192] := SELECT4(a[255:128], b[193:192]) -dst[255:224] := SELECT4(a[255:128], b[225:224]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], b[1:0]) -dst[63:32] := SELECT4(a[127:0], b[33:32]) -dst[95:64] := SELECT4(a[127:0], b[65:64]) -dst[127:96] := SELECT4(a[127:0], b[97:96]) -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". - -IF (b[1] == 0) dst[63:0] := a[63:0]; FI -IF (b[1] == 1) dst[63:0] := a[127:64]; FI -IF (b[65] == 0) dst[127:64] := a[63:0]; FI -IF (b[65] == 1) dst[127:64] := a[127:64]; FI -IF (b[129] == 0) dst[191:128] := a[191:128]; FI -IF (b[129] == 1) dst[191:128] := a[255:192]; FI -IF (b[193] == 0) dst[255:192] := a[191:128]; FI -IF (b[193] == 1) dst[255:192] := a[255:192]; FI -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst". - -IF (b[1] == 0) dst[63:0] := a[63:0]; FI -IF (b[1] == 1) dst[63:0] := a[127:64]; FI -IF (b[65] == 0) dst[127:64] := a[63:0]; FI -IF (b[65] == 1) dst[127:64] := a[127:64]; FI -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". - -IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI -IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI -IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI -IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI -IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". - -IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". - -DEFINE SELECT4(src1, src2, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src1[127:0] - 1: tmp[127:0] := src1[255:128] - 2: tmp[127:0] := src2[127:0] - 3: tmp[127:0] := src2[255:128] - ESAC - IF control[3] - tmp[127:0] := 0 - FI - RETURN tmp[127:0] -} -dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) -dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". - -DEFINE SELECT4(src1, src2, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src1[127:0] - 1: tmp[127:0] := src1[255:128] - 2: tmp[127:0] := src2[127:0] - 3: tmp[127:0] := src2[255:128] - ESAC - IF control[3] - tmp[127:0] := 0 - FI - RETURN tmp[127:0] -} -dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) -dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". - -DEFINE SELECT4(src1, src2, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src1[127:0] - 1: tmp[127:0] := src1[255:128] - 2: tmp[127:0] := src2[127:0] - 3: tmp[127:0] := src2[255:128] - ESAC - IF control[3] - tmp[127:0] := 0 - FI - RETURN tmp[127:0] -} -dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) -dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". - -dst[255:0] := a[255:0] -CASE (imm8[0]) OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -ESAC -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". - -dst[255:0] := a[255:0] -CASE imm8[0] OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -ESAC -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", then insert 128 bits from "b" into "dst" at the location specified by "imm8". - -dst[255:0] := a[255:0] -CASE (imm8[0]) OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -ESAC -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 8-bit integer "i" into "dst" at the location specified by "index". - -dst[255:0] := a[255:0] -sel := index[4:0]*8 -dst[sel+7:sel] := i[7:0] - - AVX -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "index". - -dst[255:0] := a[255:0] -sel := index[3:0]*16 -dst[sel+15:sel] := i[15:0] - - AVX -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "index". - -dst[255:0] := a[255:0] -sel := index[2:0]*32 -dst[sel+31:sel] := i[31:0] - - AVX -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "index". - -dst[255:0] := a[255:0] -sel := index[1:0]*64 -dst[sel+63:sel] := i[63:0] - - AVX -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Swizzle -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - - Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst". - [round_note] - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ROUND(a[i+63:i], rounding) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - - Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ROUND(a[i+31:i], rounding) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := FLOOR(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := CEIL(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := FLOOR(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := CEIL(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Compare -
- - - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -dst[63:0] := ( a[63:0] OP b[63:0] ) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Compare -
- - - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -dst[31:0] := ( a[31:0] OP b[31:0] ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Compare -
- - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - m := j*64 - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) -ENDFOR -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 32*j - dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) -ENDFOR -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) -ENDFOR -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Convert -
- - - - Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". - -dst[31:0] := a[31:0] - - - AVX -
immintrin.h
- Convert -
- - - - Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". - -dst[63:0] := a[63:0] - - - AVX -
immintrin.h
- Convert -
- - - - Copy the lower 32-bit integer in "a" to "dst". - -dst[31:0] := a[31:0] - - - AVX -
immintrin.h
- Convert -
- - - - Zero the contents of all XMM or YMM registers. - YMM0[MAX:0] := 0 -YMM1[MAX:0] := 0 -YMM2[MAX:0] := 0 -YMM3[MAX:0] := 0 -YMM4[MAX:0] := 0 -YMM5[MAX:0] := 0 -YMM6[MAX:0] := 0 -YMM7[MAX:0] := 0 -IF _64_BIT_MODE - YMM8[MAX:0] := 0 - YMM9[MAX:0] := 0 - YMM10[MAX:0] := 0 - YMM11[MAX:0] := 0 - YMM12[MAX:0] := 0 - YMM13[MAX:0] := 0 - YMM14[MAX:0] := 0 - YMM15[MAX:0] := 0 -FI - - - AVX -
immintrin.h
- General Support -
- - - - Zero the upper 128 bits of all YMM registers; the lower 128-bits of the registers are unmodified. - YMM0[MAX:128] := 0 -YMM1[MAX:128] := 0 -YMM2[MAX:128] := 0 -YMM3[MAX:128] := 0 -YMM4[MAX:128] := 0 -YMM5[MAX:128] := 0 -YMM6[MAX:128] := 0 -YMM7[MAX:128] := 0 -IF _64_BIT_MODE - YMM8[MAX:128] := 0 - YMM9[MAX:128] := 0 - YMM10[MAX:128] := 0 - YMM11[MAX:128] := 0 - YMM12[MAX:128] := 0 - YMM13[MAX:128] := 0 - YMM14[MAX:128] := 0 - YMM15[MAX:128] := 0 -FI - - - AVX -
immintrin.h
- General Support -
- - - - Return vector of type __m256 with undefined elements. - AVX -
immintrin.h
- General Support -
- - - - Return vector of type __m256d with undefined elements. - AVX -
immintrin.h
- General Support -
- - - - Return vector of type __m256i with undefined elements. - AVX -
immintrin.h
- General Support -
- - - - Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst". - -tmp[31:0] := MEM[mem_addr+31:mem_addr] -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := tmp[31:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - Swizzle - - - Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst". - -tmp[31:0] := MEM[mem_addr+31:mem_addr] -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := tmp[31:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Load -
- - Swizzle - - - Broadcast a double-precision (64-bit) floating-point element from memory to all elements of "dst". - -tmp[63:0] := MEM[mem_addr+63:mem_addr] -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := tmp[63:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - Swizzle - - - Broadcast 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of "dst". - -tmp[127:0] := MEM[mem_addr+127:mem_addr] -dst[127:0] := tmp[127:0] -dst[255:128] := tmp[127:0] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - Swizzle - - - Broadcast 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of "dst". - -tmp[127:0] := MEM[mem_addr+127:mem_addr] -dst[127:0] := tmp[127:0] -dst[255:128] := tmp[127:0] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - Load 256-bits of integer data from memory into "dst". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - Load 256-bits of integer data from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - - Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). - -FOR j := 0 to 3 - i := j*64 - IF mask[i+63] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - - Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). - -FOR j := 0 to 1 - i := j*64 - IF mask[i+63] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Load -
- - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). - -FOR j := 0 to 7 - i := j*32 - IF mask[i+31] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set). - -FOR j := 0 to 3 - i := j*32 - IF mask[i+31] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX -
immintrin.h
- Load -
- - - - Load 256-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm256_loadu_si256" when the data crosses a cache line boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Load -
- - - - - Load two 128-bit values (composed of 4 packed single-precision (32-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst". - "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. - -dst[127:0] := MEM[loaddr+127:loaddr] -dst[255:128] := MEM[hiaddr+127:hiaddr] -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Load -
- - - - - Load two 128-bit values (composed of 2 packed double-precision (64-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst". - "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. - -dst[127:0] := MEM[loaddr+127:loaddr] -dst[255:128] := MEM[hiaddr+127:hiaddr] -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Load -
- - - - - Load two 128-bit values (composed of integer data) from memory, and combine them into a 256-bit value in "dst". - "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. - -dst[127:0] := MEM[loaddr+127:loaddr] -dst[255:128] := MEM[hiaddr+127:hiaddr] -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Load -
- - - - - Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - Store 256-bits of integer data from "a" into memory. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - Store 256-bits of integer data from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - - Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask". - -FOR j := 0 to 3 - i := j*64 - IF mask[i+63] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX -
immintrin.h
- Store -
- - - - - - Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask". - -FOR j := 0 to 1 - i := j*64 - IF mask[i+63] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX -
immintrin.h
- Store -
- - - - - - Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask". - -FOR j := 0 to 7 - i := j*32 - IF mask[i+31] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX -
immintrin.h
- Store -
- - - - - - Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask". - -FOR j := 0 to 3 - i := j*32 - IF mask[i+31] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX -
immintrin.h
- Store -
- - - - - Store 256-bits of integer data from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX -
immintrin.h
- Store -
- - - - - - Store the high and low 128-bit halves (each composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory two different 128-bit locations. - "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. - -MEM[loaddr+127:loaddr] := a[127:0] -MEM[hiaddr+127:hiaddr] := a[255:128] - - AVX -
immintrin.h
- Store -
- - - - - - Store the high and low 128-bit halves (each composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory two different 128-bit locations. - "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. - -MEM[loaddr+127:loaddr] := a[127:0] -MEM[hiaddr+127:hiaddr] := a[255:128] - - AVX -
immintrin.h
- Store -
- - - - - - Store the high and low 128-bit halves (each composed of integer data) from "a" into memory two different 128-bit locations. - "hiaddr" and "loaddr" do not need to be aligned on any particular boundary. - -MEM[loaddr+127:loaddr] := a[127:0] -MEM[hiaddr+127:hiaddr] := a[255:128] - - AVX -
immintrin.h
- Store -
- - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". - -dst[31:0] := a[63:32] -dst[63:32] := a[63:32] -dst[95:64] := a[127:96] -dst[127:96] := a[127:96] -dst[159:128] := a[191:160] -dst[191:160] := a[191:160] -dst[223:192] := a[255:224] -dst[255:224] := a[255:224] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Move -
- - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". - -dst[31:0] := a[31:0] -dst[63:32] := a[31:0] -dst[95:64] := a[95:64] -dst[127:96] := a[95:64] -dst[159:128] := a[159:128] -dst[191:160] := a[159:128] -dst[223:192] := a[223:192] -dst[255:224] := a[223:192] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Move -
- - - - Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst". - -dst[63:0] := a[63:0] -dst[127:64] := a[63:0] -dst[191:128] := a[191:128] -dst[255:192] := a[191:128] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Move -
- - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := 1.0 / a[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SQRT(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SQRT(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Elementary Math Functions -
- - - - Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a". - -FOR j := 0 to 3 - i := j*64 - IF a[i+63] - dst[j] := 1 - ELSE - dst[j] := 0 - FI -ENDFOR -dst[MAX:4] := 0 - - - AVX -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a". - -FOR j := 0 to 7 - i := j*32 - IF a[i+31] - dst[j] := 1 - ELSE - dst[j] := 0 - FI -ENDFOR -dst[MAX:8] := 0 - - - AVX -
immintrin.h
- Miscellaneous -
- - - - Return vector of type __m256d with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX -
immintrin.h
- Set -
- - - - Return vector of type __m256 with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX -
immintrin.h
- Set -
- - - - Return vector of type __m256i with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX -
immintrin.h
- Set -
- - - - - - - Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. - -dst[63:0] := e0 -dst[127:64] := e1 -dst[191:128] := e2 -dst[255:192] := e3 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - - - - - Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. - -dst[31:0] := e0 -dst[63:32] := e1 -dst[95:64] := e2 -dst[127:96] := e3 -dst[159:128] := e4 -dst[191:160] := e5 -dst[223:192] := e6 -dst[255:224] := e7 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set packed 8-bit integers in "dst" with the supplied values. - -dst[7:0] := e0 -dst[15:8] := e1 -dst[23:16] := e2 -dst[31:24] := e3 -dst[39:32] := e4 -dst[47:40] := e5 -dst[55:48] := e6 -dst[63:56] := e7 -dst[71:64] := e8 -dst[79:72] := e9 -dst[87:80] := e10 -dst[95:88] := e11 -dst[103:96] := e12 -dst[111:104] := e13 -dst[119:112] := e14 -dst[127:120] := e15 -dst[135:128] := e16 -dst[143:136] := e17 -dst[151:144] := e18 -dst[159:152] := e19 -dst[167:160] := e20 -dst[175:168] := e21 -dst[183:176] := e22 -dst[191:184] := e23 -dst[199:192] := e24 -dst[207:200] := e25 -dst[215:208] := e26 -dst[223:216] := e27 -dst[231:224] := e28 -dst[239:232] := e29 -dst[247:240] := e30 -dst[255:248] := e31 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed 16-bit integers in "dst" with the supplied values. - -dst[15:0] := e0 -dst[31:16] := e1 -dst[47:32] := e2 -dst[63:48] := e3 -dst[79:64] := e4 -dst[95:80] := e5 -dst[111:96] := e6 -dst[127:112] := e7 -dst[143:128] := e8 -dst[159:144] := e9 -dst[175:160] := e10 -dst[191:176] := e11 -dst[207:192] := e12 -dst[223:208] := e13 -dst[239:224] := e14 -dst[255:240] := e15 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - - - - - Set packed 32-bit integers in "dst" with the supplied values. - -dst[31:0] := e0 -dst[63:32] := e1 -dst[95:64] := e2 -dst[127:96] := e3 -dst[159:128] := e4 -dst[191:160] := e5 -dst[223:192] := e6 -dst[255:224] := e7 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - Set packed 64-bit integers in "dst" with the supplied values. - -dst[63:0] := e0 -dst[127:64] := e1 -dst[191:128] := e2 -dst[255:192] := e3 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst[63:0] := e3 -dst[127:64] := e2 -dst[191:128] := e1 -dst[255:192] := e0 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - - - - - Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst[31:0] := e7 -dst[63:32] := e6 -dst[95:64] := e5 -dst[127:96] := e4 -dst[159:128] := e3 -dst[191:160] := e2 -dst[223:192] := e1 -dst[255:224] := e0 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set packed 8-bit integers in "dst" with the supplied values in reverse order. - -dst[7:0] := e31 -dst[15:8] := e30 -dst[23:16] := e29 -dst[31:24] := e28 -dst[39:32] := e27 -dst[47:40] := e26 -dst[55:48] := e25 -dst[63:56] := e24 -dst[71:64] := e23 -dst[79:72] := e22 -dst[87:80] := e21 -dst[95:88] := e20 -dst[103:96] := e19 -dst[111:104] := e18 -dst[119:112] := e17 -dst[127:120] := e16 -dst[135:128] := e15 -dst[143:136] := e14 -dst[151:144] := e13 -dst[159:152] := e12 -dst[167:160] := e11 -dst[175:168] := e10 -dst[183:176] := e9 -dst[191:184] := e8 -dst[199:192] := e7 -dst[207:200] := e6 -dst[215:208] := e5 -dst[223:216] := e4 -dst[231:224] := e3 -dst[239:232] := e2 -dst[247:240] := e1 -dst[255:248] := e0 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed 16-bit integers in "dst" with the supplied values in reverse order. - -dst[15:0] := e15 -dst[31:16] := e14 -dst[47:32] := e13 -dst[63:48] := e12 -dst[79:64] := e11 -dst[95:80] := e10 -dst[111:96] := e9 -dst[127:112] := e8 -dst[143:128] := e7 -dst[159:144] := e6 -dst[175:160] := e5 -dst[191:176] := e4 -dst[207:192] := e3 -dst[223:208] := e2 -dst[239:224] := e1 -dst[255:240] := e0 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - - - - - Set packed 32-bit integers in "dst" with the supplied values in reverse order. - -dst[31:0] := e7 -dst[63:32] := e6 -dst[95:64] := e5 -dst[127:96] := e4 -dst[159:128] := e3 -dst[191:160] := e2 -dst[223:192] := e1 -dst[255:224] := e0 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - - - Set packed 64-bit integers in "dst" with the supplied values in reverse order. - -dst[63:0] := e3 -dst[127:64] := e2 -dst[191:128] := e1 -dst[255:192] := e0 -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastb". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := a[7:0] -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate the "vpbroadcastw". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := a[15:0] -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastd". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:256] := 0 - - AVX -
immintrin.h
- Set -
- - - - - Set packed __m256 vector "dst" with the supplied values. - -dst[127:0] := lo[127:0] -dst[255:128] := hi[127:0] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Set -
- - - - - Set packed __m256d vector "dst" with the supplied values. - -dst[127:0] := lo[127:0] -dst[255:128] := hi[127:0] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Set -
- - - - - Set packed __m256i vector "dst" with the supplied values. - -dst[127:0] := lo[127:0] -dst[255:128] := hi[127:0] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Set -
- - - - - Set packed __m256 vector "dst" with the supplied values. - -dst[127:0] := lo[127:0] -dst[255:128] := hi[127:0] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Set -
- - - - - Set packed __m256d vector "dst" with the supplied values. - -dst[127:0] := lo[127:0] -dst[255:128] := hi[127:0] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Set -
- - - - - Set packed __m256i vector "dst" with the supplied values. - -dst[127:0] := lo[127:0] -dst[255:128] := hi[127:0] -dst[MAX:256] := 0 - - - AVX -
immintrin.h
- Set -
- - - - Cast vector of type __m256d to type __m256. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m256 to type __m256d. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m256 to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m256d to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m256i to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m256i to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m256 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m256d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m256i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m128 to type __m256; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m128 to type __m256; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX -
immintrin.h
- Cast -
- - - - - - - Extract an 8-bit integer from "a", selected with "index", and store the result in "dst". - -dst[7:0] := (a[255:0] >> (index[4:0] * 8))[7:0] - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Extract a 16-bit integer from "a", selected with "index", and store the result in "dst". - -dst[15:0] := (a[255:0] >> (index[3:0] * 16))[15:0] - - AVX2 -
immintrin.h
- Swizzle -
- - - - - - Blend packed 16-bit integers from "a" and "b" within 128-bit lanes using control mask "imm8", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF imm8[j%8] - dst[i+15:i] := b[i+15:i] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - - Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF imm8[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - - Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF imm8[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - - Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - IF mask[i+7] - dst[i+7:i] := b[i+7:i] - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := a[7:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := a[7:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst". - -dst[127:0] := a[127:0] -dst[255:128] := a[127:0] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst". - -dst[127:0] := a[127:0] -dst[255:128] := a[127:0] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := a[15:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := a[15:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -ESAC -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of integer data) from "b" into "dst" at the location specified by "imm8". - -dst[255:0] := a[255:0] -CASE (imm8[0]) OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -ESAC -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - - Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst". - -DEFINE SELECT4(src1, src2, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src1[127:0] - 1: tmp[127:0] := src1[255:128] - 2: tmp[127:0] := src2[127:0] - 3: tmp[127:0] := src2[255:128] - ESAC - IF control[3] - tmp[127:0] := 0 - FI - RETURN tmp[127:0] -} -dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0]) -dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - id := idx[i+2:i]*32 - dst[i+31:i] := a[id+31:id] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". - -FOR j := 0 to 7 - i := j*32 - id := idx[i+2:i]*32 - dst[i+31:i] := a[id+31:id] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Shuffle 8-bit integers in "a" within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[3:0] := b[i+3:i] - dst[i+7:i] := a[index*8+7:index*8] - FI - IF b[128+i+7] == 1 - dst[128+i+7:128+i] := 0 - ELSE - index[3:0] := b[128+i+3:128+i] - dst[128+i+7:128+i] := a[128+index*8+7:128+index*8] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst". - -dst[63:0] := a[63:0] -dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] -dst[191:128] := a[191:128] -dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] -dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] -dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] -dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst". - -dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -dst[127:64] := a[127:64] -dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] -dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] -dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] -dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] -dst[255:192] := a[255:192] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Swizzle -
- - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := ABS(a[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ABS(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ABS(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Special Math Functions -
- - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := a[i+7:i] + b[i+7:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := a[i+15:i] + b[i+15:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] + b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". - -dst[15:0] := a[31:16] + a[15:0] -dst[31:16] := a[63:48] + a[47:32] -dst[47:32] := a[95:80] + a[79:64] -dst[63:48] := a[127:112] + a[111:96] -dst[79:64] := b[31:16] + b[15:0] -dst[95:80] := b[63:48] + b[47:32] -dst[111:96] := b[95:80] + b[79:64] -dst[127:112] := b[127:112] + b[111:96] -dst[143:128] := a[159:144] + a[143:128] -dst[159:144] := a[191:176] + a[175:160] -dst[175:160] := a[223:208] + a[207:192] -dst[191:176] := a[255:240] + a[239:224] -dst[207:192] := b[159:144] + b[143:128] -dst[223:208] := b[191:176] + b[175:160] -dst[239:224] := b[223:208] + b[207:192] -dst[255:240] := b[255:240] + b[239:224] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". - -dst[31:0] := a[63:32] + a[31:0] -dst[63:32] := a[127:96] + a[95:64] -dst[95:64] := b[63:32] + b[31:0] -dst[127:96] := b[127:96] + b[95:64] -dst[159:128] := a[191:160] + a[159:128] -dst[191:160] := a[255:224] + a[223:192] -dst[223:192] := b[191:160] + b[159:128] -dst[255:224] := b[255:224] + b[223:192] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". - -dst[15:0] := Saturate16(a[31:16] + a[15:0]) -dst[31:16] := Saturate16(a[63:48] + a[47:32]) -dst[47:32] := Saturate16(a[95:80] + a[79:64]) -dst[63:48] := Saturate16(a[127:112] + a[111:96]) -dst[79:64] := Saturate16(b[31:16] + b[15:0]) -dst[95:80] := Saturate16(b[63:48] + b[47:32]) -dst[111:96] := Saturate16(b[95:80] + b[79:64]) -dst[127:112] := Saturate16(b[127:112] + b[111:96]) -dst[143:128] := Saturate16(a[159:144] + a[143:128]) -dst[159:144] := Saturate16(a[191:176] + a[175:160]) -dst[175:160] := Saturate16(a[223:208] + a[207:192]) -dst[191:176] := Saturate16(a[255:240] + a[239:224]) -dst[207:192] := Saturate16(b[159:144] + b[143:128]) -dst[223:208] := Saturate16(b[191:176] + b[175:160]) -dst[239:224] := Saturate16(b[223:208] + b[207:192]) -dst[255:240] := Saturate16(b[255:240] + b[239:224]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". - -dst[15:0] := a[15:0] - a[31:16] -dst[31:16] := a[47:32] - a[63:48] -dst[47:32] := a[79:64] - a[95:80] -dst[63:48] := a[111:96] - a[127:112] -dst[79:64] := b[15:0] - b[31:16] -dst[95:80] := b[47:32] - b[63:48] -dst[111:96] := b[79:64] - b[95:80] -dst[127:112] := b[111:96] - b[127:112] -dst[143:128] := a[143:128] - a[159:144] -dst[159:144] := a[175:160] - a[191:176] -dst[175:160] := a[207:192] - a[223:208] -dst[191:176] := a[239:224] - a[255:240] -dst[207:192] := b[143:128] - b[159:144] -dst[223:208] := b[175:160] - b[191:176] -dst[239:224] := b[207:192] - b[223:208] -dst[255:240] := b[239:224] - b[255:240] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". - -dst[31:0] := a[31:0] - a[63:32] -dst[63:32] := a[95:64] - a[127:96] -dst[95:64] := b[31:0] - b[63:32] -dst[127:96] := b[95:64] - b[127:96] -dst[159:128] := a[159:128] - a[191:160] -dst[191:160] := a[223:192] - a[255:224] -dst[223:192] := b[159:128] - b[191:160] -dst[255:224] := b[223:192] - b[255:224] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". - -dst[15:0] := Saturate16(a[15:0] - a[31:16]) -dst[31:16] := Saturate16(a[47:32] - a[63:48]) -dst[47:32] := Saturate16(a[79:64] - a[95:80]) -dst[63:48] := Saturate16(a[111:96] - a[127:112]) -dst[79:64] := Saturate16(b[15:0] - b[31:16]) -dst[95:80] := Saturate16(b[47:32] - b[63:48]) -dst[111:96] := Saturate16(b[79:64] - b[95:80]) -dst[127:112] := Saturate16(b[111:96] - b[127:112]) -dst[143:128] := Saturate16(a[143:128] - a[159:144]) -dst[159:144] := Saturate16(a[175:160] - a[191:176]) -dst[175:160] := Saturate16(a[207:192] - a[223:208]) -dst[191:176] := Saturate16(a[239:224] - a[255:240]) -dst[207:192] := Saturate16(b[143:128] - b[159:144]) -dst[223:208] := Saturate16(b[175:160] - b[191:176]) -dst[239:224] := Saturate16(b[207:192] - b[223:208]) -dst[255:240] := Saturate16(b[239:224] - b[255:240]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+31:i] * b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 15 - i := j*16 - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 15 - i := j*16 - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". - -FOR j := 0 to 15 - i := j*16 - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 15 - i := j*16 - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed signed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". - -FOR j := 0 to 7 - i := j*32 - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". - -FOR j := 0 to 31 - i := j*8 - tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) -ENDFOR -FOR j := 0 to 3 - i := j*64 - dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ - tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] - dst[i+63:i+16] := 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Negate packed signed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 31 - i := j*8 - IF b[i+7:i] < 0 - dst[i+7:i] := -(a[i+7:i]) - ELSE IF b[i+7:i] == 0 - dst[i+7:i] := 0 - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Negate packed signed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 15 - i := j*16 - IF b[i+15:i] < 0 - dst[i+15:i] := -(a[i+15:i]) - ELSE IF b[i+15:i] == 0 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Negate packed signed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 7 - i := j*32 - IF b[i+31:i] < 0 - dst[i+31:i] := -(a[i+31:i]) - ELSE IF b[i+31:i] == 0 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := a[i+7:i] - b[i+7:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := a[i+15:i] - b[i+15:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] - b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Arithmetic -
- - - - - - Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". - -FOR j := 0 to 1 - i := j*128 - tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) - dst[i+127:i] := tmp[127:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Miscellaneous -
- - - - Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[j] := a[i+7] -ENDFOR - - - AVX2 -
immintrin.h
- Miscellaneous -
- - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". - Eight SADs are performed for each 128-bit lane using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8". - -DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) { - a_offset := imm8[2]*32 - b_offset := imm8[1:0]*32 - FOR j := 0 to 7 - i := j*8 - k := a_offset+i - l := b_offset - tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \ - ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24])) - ENDFOR - RETURN tmp[127:0] -} -dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0]) -dst[255:128] := MPSADBW(a[255:128], b[255:128], imm8[5:3]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Miscellaneous -
- - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". - -dst[7:0] := Saturate8(a[15:0]) -dst[15:8] := Saturate8(a[31:16]) -dst[23:16] := Saturate8(a[47:32]) -dst[31:24] := Saturate8(a[63:48]) -dst[39:32] := Saturate8(a[79:64]) -dst[47:40] := Saturate8(a[95:80]) -dst[55:48] := Saturate8(a[111:96]) -dst[63:56] := Saturate8(a[127:112]) -dst[71:64] := Saturate8(b[15:0]) -dst[79:72] := Saturate8(b[31:16]) -dst[87:80] := Saturate8(b[47:32]) -dst[95:88] := Saturate8(b[63:48]) -dst[103:96] := Saturate8(b[79:64]) -dst[111:104] := Saturate8(b[95:80]) -dst[119:112] := Saturate8(b[111:96]) -dst[127:120] := Saturate8(b[127:112]) -dst[135:128] := Saturate8(a[143:128]) -dst[143:136] := Saturate8(a[159:144]) -dst[151:144] := Saturate8(a[175:160]) -dst[159:152] := Saturate8(a[191:176]) -dst[167:160] := Saturate8(a[207:192]) -dst[175:168] := Saturate8(a[223:208]) -dst[183:176] := Saturate8(a[239:224]) -dst[191:184] := Saturate8(a[255:240]) -dst[199:192] := Saturate8(b[143:128]) -dst[207:200] := Saturate8(b[159:144]) -dst[215:208] := Saturate8(b[175:160]) -dst[223:216] := Saturate8(b[191:176]) -dst[231:224] := Saturate8(b[207:192]) -dst[239:232] := Saturate8(b[223:208]) -dst[247:240] := Saturate8(b[239:224]) -dst[255:248] := Saturate8(b[255:240]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Miscellaneous -
- - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". - -dst[15:0] := Saturate16(a[31:0]) -dst[31:16] := Saturate16(a[63:32]) -dst[47:32] := Saturate16(a[95:64]) -dst[63:48] := Saturate16(a[127:96]) -dst[79:64] := Saturate16(b[31:0]) -dst[95:80] := Saturate16(b[63:32]) -dst[111:96] := Saturate16(b[95:64]) -dst[127:112] := Saturate16(b[127:96]) -dst[143:128] := Saturate16(a[159:128]) -dst[159:144] := Saturate16(a[191:160]) -dst[175:160] := Saturate16(a[223:192]) -dst[191:176] := Saturate16(a[255:224]) -dst[207:192] := Saturate16(b[159:128]) -dst[223:208] := Saturate16(b[191:160]) -dst[239:224] := Saturate16(b[223:192]) -dst[255:240] := Saturate16(b[255:224]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Miscellaneous -
- - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". - -dst[7:0] := SaturateU8(a[15:0]) -dst[15:8] := SaturateU8(a[31:16]) -dst[23:16] := SaturateU8(a[47:32]) -dst[31:24] := SaturateU8(a[63:48]) -dst[39:32] := SaturateU8(a[79:64]) -dst[47:40] := SaturateU8(a[95:80]) -dst[55:48] := SaturateU8(a[111:96]) -dst[63:56] := SaturateU8(a[127:112]) -dst[71:64] := SaturateU8(b[15:0]) -dst[79:72] := SaturateU8(b[31:16]) -dst[87:80] := SaturateU8(b[47:32]) -dst[95:88] := SaturateU8(b[63:48]) -dst[103:96] := SaturateU8(b[79:64]) -dst[111:104] := SaturateU8(b[95:80]) -dst[119:112] := SaturateU8(b[111:96]) -dst[127:120] := SaturateU8(b[127:112]) -dst[135:128] := SaturateU8(a[143:128]) -dst[143:136] := SaturateU8(a[159:144]) -dst[151:144] := SaturateU8(a[175:160]) -dst[159:152] := SaturateU8(a[191:176]) -dst[167:160] := SaturateU8(a[207:192]) -dst[175:168] := SaturateU8(a[223:208]) -dst[183:176] := SaturateU8(a[239:224]) -dst[191:184] := SaturateU8(a[255:240]) -dst[199:192] := SaturateU8(b[143:128]) -dst[207:200] := SaturateU8(b[159:144]) -dst[215:208] := SaturateU8(b[175:160]) -dst[223:216] := SaturateU8(b[191:176]) -dst[231:224] := SaturateU8(b[207:192]) -dst[239:232] := SaturateU8(b[223:208]) -dst[247:240] := SaturateU8(b[239:224]) -dst[255:248] := SaturateU8(b[255:240]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Miscellaneous -
- - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". - -dst[15:0] := SaturateU16(a[31:0]) -dst[31:16] := SaturateU16(a[63:32]) -dst[47:32] := SaturateU16(a[95:64]) -dst[63:48] := SaturateU16(a[127:96]) -dst[79:64] := SaturateU16(b[31:0]) -dst[95:80] := SaturateU16(b[63:32]) -dst[111:96] := SaturateU16(b[95:64]) -dst[127:112] := SaturateU16(b[127:96]) -dst[143:128] := SaturateU16(a[159:128]) -dst[159:144] := SaturateU16(a[191:160]) -dst[175:160] := SaturateU16(a[223:192]) -dst[191:176] := SaturateU16(a[255:224]) -dst[207:192] := SaturateU16(b[159:128]) -dst[223:208] := SaturateU16(b[191:160]) -dst[239:224] := SaturateU16(b[223:192]) -dst[255:240] := SaturateU16(b[255:224]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Miscellaneous -
- - - - - Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[255:0] := (a[255:0] AND b[255:0]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Logical -
- - - - - Compute the bitwise NOT of 256 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". - -dst[255:0] := ((NOT a[255:0]) AND b[255:0]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[255:0] := (a[255:0] OR b[255:0]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[255:0] := (a[255:0] XOR b[255:0]) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Logical -
- - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Probability/Statistics -
- - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Probability/Statistics -
- - - - - Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Compare -
- - - - - Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Compare -
- - - - - Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Compare -
- - - - - Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ( a[i+63:i] > b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Compare -
- - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j:= 0 to 7 - i := 32*j - k := 16*j - dst[i+31:i] := SignExtend32(a[k+15:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j:= 0 to 3 - i := 64*j - k := 16*j - dst[i+63:i] := SignExtend64(a[k+15:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j:= 0 to 3 - i := 64*j - k := 32*j - dst[i+63:i] := SignExtend64(a[k+31:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - l := j*16 - dst[l+15:l] := SignExtend16(a[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 8*j - dst[i+31:i] := SignExtend32(a[k+7:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 8*j - dst[i+63:i] := SignExtend64(a[k+7:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 16*j - dst[i+31:i] := ZeroExtend32(a[k+15:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j:= 0 to 3 - i := 64*j - k := 16*j - dst[i+63:i] := ZeroExtend64(a[k+15:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j:= 0 to 3 - i := 64*j - k := 32*j - dst[i+63:i] := ZeroExtend64(a[k+31:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - l := j*16 - dst[l+15:l] := ZeroExtend16(a[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 8*j - dst[i+31:i] := ZeroExtend32(a[k+7:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 8*j - dst[i+63:i] := ZeroExtend64(a[k+7:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Convert -
- - - - - - Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:64] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:64] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - IF mask[i+63] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -mask[MAX:128] := 0 -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - IF mask[i+63] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -mask[MAX:256] := 0 -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - IF mask[i+31] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -mask[MAX:128] := 0 -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - IF mask[i+31] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -mask[MAX:256] := 0 -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - IF mask[i+31] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -mask[MAX:128] := 0 -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - IF mask[i+31] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -mask[MAX:256] := 0 -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - IF mask[i+63] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -mask[MAX:128] := 0 -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - IF mask[i+63] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -mask[MAX:256] := 0 -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - IF mask[i+63] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -mask[MAX:128] := 0 -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - IF mask[i+63] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -mask[MAX:256] := 0 -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - IF mask[i+31] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -mask[MAX:64] := 0 -dst[MAX:64] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - IF mask[i+31] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -mask[MAX:128] := 0 -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - IF mask[i+31] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -mask[MAX:64] := 0 -dst[MAX:64] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - IF mask[i+31] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -mask[MAX:128] := 0 -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - IF mask[i+63] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -mask[MAX:128] := 0 -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - IF mask[i+63] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -mask[MAX:256] := 0 -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). - -FOR j := 0 to 3 - i := j*32 - IF mask[i+31] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). - -FOR j := 0 to 7 - i := j*32 - IF mask[i+31] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). - -FOR j := 0 to 1 - i := j*64 - IF mask[i+63] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element). - -FOR j := 0 to 3 - i := j*64 - IF mask[i+63] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - Load 256-bits of integer data from memory into "dst" using a non-temporal memory hint. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Load -
- - - - - - Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). - -FOR j := 0 to 3 - i := j*32 - IF mask[i+31] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX2 -
immintrin.h
- Store -
- - - - - - Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). - -FOR j := 0 to 7 - i := j*32 - IF mask[i+31] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX2 -
immintrin.h
- Store -
- - - - - - Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). - -FOR j := 0 to 1 - i := j*64 - IF mask[i+63] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX2 -
immintrin.h
- Store -
- - - - - - Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). - -FOR j := 0 to 3 - i := j*64 - IF mask[i+63] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX2 -
immintrin.h
- Store -
- - - - - Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] << (tmp*8) -dst[255:128] := a[255:128] << (tmp*8) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] << (tmp*8) -dst[255:128] := a[255:128] << (tmp*8) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] >> (tmp*8) -dst[255:128] := a[255:128] >> (tmp*8) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] >> (tmp*8) -dst[255:128] := a[255:128] >> (tmp*8) -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX2 -
immintrin.h
- Shift -
- - - - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -FOR i := 0 to 1 - tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] - tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] - tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] - tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] -ENDFOR -FOR j := 0 to 3 - i := j*64 - dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -FOR i := 0 to 1 - tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] - tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] - tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] - tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] -ENDFOR -FOR j := 0 to 3 - i := j*64 - tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -FOR i := 0 to 1 - tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] - tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] - tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] - tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] -ENDFOR -FOR j := 0 to 3 - i := j*64 - tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -tmp.dword[0] := b.dword[ imm8[1:0] ] -tmp.dword[1] := b.dword[ imm8[3:2] ] -tmp.dword[2] := b.dword[ imm8[5:4] ] -tmp.dword[3] := b.dword[ imm8[7:6] ] -FOR j := 0 to 1 - i := j*64 - dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -tmp.dword[0] := b.dword[ imm8[1:0] ] -tmp.dword[1] := b.dword[ imm8[3:2] ] -tmp.dword[2] := b.dword[ imm8[5:4] ] -tmp.dword[3] := b.dword[ imm8[7:6] ] -FOR j := 0 to 1 - i := j*64 - tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -tmp.dword[0] := b.dword[ imm8[1:0] ] -tmp.dword[1] := b.dword[ imm8[3:2] ] -tmp.dword[2] := b.dword[ imm8[5:4] ] -tmp.dword[3] := b.dword[ imm8[7:6] ] -FOR j := 0 to 1 - i := j*64 - tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*128 - tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) - tmp_dst[i+127:i] := tmp[127:0] -ENDFOR -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*128 - tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) - tmp_dst[i+127:i] := tmp[127:0] -ENDFOR -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := b[i+7:i] - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := b[i+7:i] - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := b[i+15:i] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := b[i+15:i] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - off := 16*idx[i+3:i] - dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := idx[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - off := 16*idx[i+3:i] - dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - off := 16*idx[i+3:i] - dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - off := 16*idx[i+3:i] - dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off] -ENDFOR -dst[MAX:256] := 0 - - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - off := 16*idx[i+2:i] - dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := idx[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - off := 16*idx[i+2:i] - dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - off := 16*idx[i+2:i] - dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - off := 16*idx[i+2:i] - dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off] -ENDFOR -dst[MAX:128] := 0 - - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - id := idx[i+3:i]*16 - IF k[j] - dst[i+15:i] := a[id+15:id] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - id := idx[i+3:i]*16 - IF k[j] - dst[i+15:i] := a[id+15:id] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - id := idx[i+3:i]*16 - dst[i+15:i] := a[id+15:id] -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - id := idx[i+2:i]*16 - IF k[j] - dst[i+15:i] := a[id+15:id] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - id := idx[i+2:i]*16 - IF k[j] - dst[i+15:i] := a[id+15:id] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - id := idx[i+2:i]*16 - dst[i+15:i] := a[id+15:id] -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". - -FOR j := 0 to 31 - i := j*8 - IF a[i+7] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". - -FOR j := 0 to 15 - i := j*8 - IF a[i+7] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := 0xFF - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := 0xFF - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := 0xFFFF - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := 0xFFFF - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". - -FOR j := 0 to 15 - i := j*16 - IF a[i+15] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". - -FOR j := 0 to 7 - i := j*16 - IF a[i+15] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[4:0] := b[i+3:i] + (j & 0x10) - dst[i+7:i] := a[index*8+7:index*8] - FI - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[4:0] := b[i+3:i] + (j & 0x10) - dst[i+7:i] := a[index*8+7:index*8] - FI - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[3:0] := b[i+3:i] - dst[i+7:i] := a[index*8+7:index*8] - FI - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[3:0] := b[i+3:i] - dst[i+7:i] := a[index*8+7:index*8] - FI - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[63:0] := a[63:0] -tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] -tmp_dst[191:128] := a[191:128] -tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] -tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] -tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] -tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[63:0] := a[63:0] -tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] -tmp_dst[191:128] := a[191:128] -tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] -tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] -tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] -tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[63:0] := a[63:0] -tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[63:0] := a[63:0] -tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -tmp_dst[127:64] := a[127:64] -tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] -tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] -tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] -tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] -tmp_dst[255:192] := a[255:192] -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -tmp_dst[127:64] := a[127:64] -tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] -tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] -tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] -tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] -tmp_dst[255:192] := a[255:192] -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -tmp_dst[127:64] := a[127:64] -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -tmp_dst[127:64] := a[127:64] -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 16 packed 16-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 32 packed 8-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 8 packed 16-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 16 packed 8-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Load -
- - - - - - Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Move -
- - - - - - Store packed 16-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*16 - IF k[j] - MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 16-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*16 - IF k[j] - MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 8-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 31 - i := j*8 - IF k[j] - MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 8-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*8 - IF k[j] - MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 16 packed 16-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX512BW - AVX512VL -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 32 packed 8-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX512BW - AVX512VL -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 8 packed 16-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - AVX512BW - AVX512VL -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 16 packed 8-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - AVX512BW - AVX512VL -
immintrin.h
- Store -
- - - - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := ABS(a[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := ABS(a[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := ABS(a[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := ABS(a[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := ABS(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := ABS(a[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := ABS(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := ABS(a[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] + b[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] + b[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] + b[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] + b[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] + b[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] + b[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] + b[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] + b[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - b[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - b[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - b[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - b[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - b[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - b[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - b[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - b[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - Miscellaneous - - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := Saturate16(a[31:0]) -tmp_dst[31:16] := Saturate16(a[63:32]) -tmp_dst[47:32] := Saturate16(a[95:64]) -tmp_dst[63:48] := Saturate16(a[127:96]) -tmp_dst[79:64] := Saturate16(b[31:0]) -tmp_dst[95:80] := Saturate16(b[63:32]) -tmp_dst[111:96] := Saturate16(b[95:64]) -tmp_dst[127:112] := Saturate16(b[127:96]) -tmp_dst[143:128] := Saturate16(a[159:128]) -tmp_dst[159:144] := Saturate16(a[191:160]) -tmp_dst[175:160] := Saturate16(a[223:192]) -tmp_dst[191:176] := Saturate16(a[255:224]) -tmp_dst[207:192] := Saturate16(b[159:128]) -tmp_dst[223:208] := Saturate16(b[191:160]) -tmp_dst[239:224] := Saturate16(b[223:192]) -tmp_dst[255:240] := Saturate16(b[255:224]) -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := Saturate16(a[31:0]) -tmp_dst[31:16] := Saturate16(a[63:32]) -tmp_dst[47:32] := Saturate16(a[95:64]) -tmp_dst[63:48] := Saturate16(a[127:96]) -tmp_dst[79:64] := Saturate16(b[31:0]) -tmp_dst[95:80] := Saturate16(b[63:32]) -tmp_dst[111:96] := Saturate16(b[95:64]) -tmp_dst[127:112] := Saturate16(b[127:96]) -tmp_dst[143:128] := Saturate16(a[159:128]) -tmp_dst[159:144] := Saturate16(a[191:160]) -tmp_dst[175:160] := Saturate16(a[223:192]) -tmp_dst[191:176] := Saturate16(a[255:224]) -tmp_dst[207:192] := Saturate16(b[159:128]) -tmp_dst[223:208] := Saturate16(b[191:160]) -tmp_dst[239:224] := Saturate16(b[223:192]) -tmp_dst[255:240] := Saturate16(b[255:224]) -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := Saturate16(a[31:0]) -tmp_dst[31:16] := Saturate16(a[63:32]) -tmp_dst[47:32] := Saturate16(a[95:64]) -tmp_dst[63:48] := Saturate16(a[127:96]) -tmp_dst[79:64] := Saturate16(b[31:0]) -tmp_dst[95:80] := Saturate16(b[63:32]) -tmp_dst[111:96] := Saturate16(b[95:64]) -tmp_dst[127:112] := Saturate16(b[127:96]) -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := Saturate16(a[31:0]) -tmp_dst[31:16] := Saturate16(a[63:32]) -tmp_dst[47:32] := Saturate16(a[95:64]) -tmp_dst[63:48] := Saturate16(a[127:96]) -tmp_dst[79:64] := Saturate16(b[31:0]) -tmp_dst[95:80] := Saturate16(b[63:32]) -tmp_dst[111:96] := Saturate16(b[95:64]) -tmp_dst[127:112] := Saturate16(b[127:96]) -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[7:0] := Saturate8(a[15:0]) -tmp_dst[15:8] := Saturate8(a[31:16]) -tmp_dst[23:16] := Saturate8(a[47:32]) -tmp_dst[31:24] := Saturate8(a[63:48]) -tmp_dst[39:32] := Saturate8(a[79:64]) -tmp_dst[47:40] := Saturate8(a[95:80]) -tmp_dst[55:48] := Saturate8(a[111:96]) -tmp_dst[63:56] := Saturate8(a[127:112]) -tmp_dst[71:64] := Saturate8(b[15:0]) -tmp_dst[79:72] := Saturate8(b[31:16]) -tmp_dst[87:80] := Saturate8(b[47:32]) -tmp_dst[95:88] := Saturate8(b[63:48]) -tmp_dst[103:96] := Saturate8(b[79:64]) -tmp_dst[111:104] := Saturate8(b[95:80]) -tmp_dst[119:112] := Saturate8(b[111:96]) -tmp_dst[127:120] := Saturate8(b[127:112]) -tmp_dst[135:128] := Saturate8(a[143:128]) -tmp_dst[143:136] := Saturate8(a[159:144]) -tmp_dst[151:144] := Saturate8(a[175:160]) -tmp_dst[159:152] := Saturate8(a[191:176]) -tmp_dst[167:160] := Saturate8(a[207:192]) -tmp_dst[175:168] := Saturate8(a[223:208]) -tmp_dst[183:176] := Saturate8(a[239:224]) -tmp_dst[191:184] := Saturate8(a[255:240]) -tmp_dst[199:192] := Saturate8(b[143:128]) -tmp_dst[207:200] := Saturate8(b[159:144]) -tmp_dst[215:208] := Saturate8(b[175:160]) -tmp_dst[223:216] := Saturate8(b[191:176]) -tmp_dst[231:224] := Saturate8(b[207:192]) -tmp_dst[239:232] := Saturate8(b[223:208]) -tmp_dst[247:240] := Saturate8(b[239:224]) -tmp_dst[255:248] := Saturate8(b[255:240]) -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[7:0] := Saturate8(a[15:0]) -tmp_dst[15:8] := Saturate8(a[31:16]) -tmp_dst[23:16] := Saturate8(a[47:32]) -tmp_dst[31:24] := Saturate8(a[63:48]) -tmp_dst[39:32] := Saturate8(a[79:64]) -tmp_dst[47:40] := Saturate8(a[95:80]) -tmp_dst[55:48] := Saturate8(a[111:96]) -tmp_dst[63:56] := Saturate8(a[127:112]) -tmp_dst[71:64] := Saturate8(b[15:0]) -tmp_dst[79:72] := Saturate8(b[31:16]) -tmp_dst[87:80] := Saturate8(b[47:32]) -tmp_dst[95:88] := Saturate8(b[63:48]) -tmp_dst[103:96] := Saturate8(b[79:64]) -tmp_dst[111:104] := Saturate8(b[95:80]) -tmp_dst[119:112] := Saturate8(b[111:96]) -tmp_dst[127:120] := Saturate8(b[127:112]) -tmp_dst[135:128] := Saturate8(a[143:128]) -tmp_dst[143:136] := Saturate8(a[159:144]) -tmp_dst[151:144] := Saturate8(a[175:160]) -tmp_dst[159:152] := Saturate8(a[191:176]) -tmp_dst[167:160] := Saturate8(a[207:192]) -tmp_dst[175:168] := Saturate8(a[223:208]) -tmp_dst[183:176] := Saturate8(a[239:224]) -tmp_dst[191:184] := Saturate8(a[255:240]) -tmp_dst[199:192] := Saturate8(b[143:128]) -tmp_dst[207:200] := Saturate8(b[159:144]) -tmp_dst[215:208] := Saturate8(b[175:160]) -tmp_dst[223:216] := Saturate8(b[191:176]) -tmp_dst[231:224] := Saturate8(b[207:192]) -tmp_dst[239:232] := Saturate8(b[223:208]) -tmp_dst[247:240] := Saturate8(b[239:224]) -tmp_dst[255:248] := Saturate8(b[255:240]) -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[7:0] := Saturate8(a[15:0]) -tmp_dst[15:8] := Saturate8(a[31:16]) -tmp_dst[23:16] := Saturate8(a[47:32]) -tmp_dst[31:24] := Saturate8(a[63:48]) -tmp_dst[39:32] := Saturate8(a[79:64]) -tmp_dst[47:40] := Saturate8(a[95:80]) -tmp_dst[55:48] := Saturate8(a[111:96]) -tmp_dst[63:56] := Saturate8(a[127:112]) -tmp_dst[71:64] := Saturate8(b[15:0]) -tmp_dst[79:72] := Saturate8(b[31:16]) -tmp_dst[87:80] := Saturate8(b[47:32]) -tmp_dst[95:88] := Saturate8(b[63:48]) -tmp_dst[103:96] := Saturate8(b[79:64]) -tmp_dst[111:104] := Saturate8(b[95:80]) -tmp_dst[119:112] := Saturate8(b[111:96]) -tmp_dst[127:120] := Saturate8(b[127:112]) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[7:0] := Saturate8(a[15:0]) -tmp_dst[15:8] := Saturate8(a[31:16]) -tmp_dst[23:16] := Saturate8(a[47:32]) -tmp_dst[31:24] := Saturate8(a[63:48]) -tmp_dst[39:32] := Saturate8(a[79:64]) -tmp_dst[47:40] := Saturate8(a[95:80]) -tmp_dst[55:48] := Saturate8(a[111:96]) -tmp_dst[63:56] := Saturate8(a[127:112]) -tmp_dst[71:64] := Saturate8(b[15:0]) -tmp_dst[79:72] := Saturate8(b[31:16]) -tmp_dst[87:80] := Saturate8(b[47:32]) -tmp_dst[95:88] := Saturate8(b[63:48]) -tmp_dst[103:96] := Saturate8(b[79:64]) -tmp_dst[111:104] := Saturate8(b[95:80]) -tmp_dst[119:112] := Saturate8(b[111:96]) -tmp_dst[127:120] := Saturate8(b[127:112]) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := SaturateU16(a[31:0]) -tmp_dst[31:16] := SaturateU16(a[63:32]) -tmp_dst[47:32] := SaturateU16(a[95:64]) -tmp_dst[63:48] := SaturateU16(a[127:96]) -tmp_dst[79:64] := SaturateU16(b[31:0]) -tmp_dst[95:80] := SaturateU16(b[63:32]) -tmp_dst[111:96] := SaturateU16(b[95:64]) -tmp_dst[127:112] := SaturateU16(b[127:96]) -tmp_dst[143:128] := SaturateU16(a[159:128]) -tmp_dst[159:144] := SaturateU16(a[191:160]) -tmp_dst[175:160] := SaturateU16(a[223:192]) -tmp_dst[191:176] := SaturateU16(a[255:224]) -tmp_dst[207:192] := SaturateU16(b[159:128]) -tmp_dst[223:208] := SaturateU16(b[191:160]) -tmp_dst[239:224] := SaturateU16(b[223:192]) -tmp_dst[255:240] := SaturateU16(b[255:224]) -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := SaturateU16(a[31:0]) -tmp_dst[31:16] := SaturateU16(a[63:32]) -tmp_dst[47:32] := SaturateU16(a[95:64]) -tmp_dst[63:48] := SaturateU16(a[127:96]) -tmp_dst[79:64] := SaturateU16(b[31:0]) -tmp_dst[95:80] := SaturateU16(b[63:32]) -tmp_dst[111:96] := SaturateU16(b[95:64]) -tmp_dst[127:112] := SaturateU16(b[127:96]) -tmp_dst[143:128] := SaturateU16(a[159:128]) -tmp_dst[159:144] := SaturateU16(a[191:160]) -tmp_dst[175:160] := SaturateU16(a[223:192]) -tmp_dst[191:176] := SaturateU16(a[255:224]) -tmp_dst[207:192] := SaturateU16(b[159:128]) -tmp_dst[223:208] := SaturateU16(b[191:160]) -tmp_dst[239:224] := SaturateU16(b[223:192]) -tmp_dst[255:240] := SaturateU16(b[255:224]) -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := SaturateU16(a[31:0]) -tmp_dst[31:16] := SaturateU16(a[63:32]) -tmp_dst[47:32] := SaturateU16(a[95:64]) -tmp_dst[63:48] := SaturateU16(a[127:96]) -tmp_dst[79:64] := SaturateU16(b[31:0]) -tmp_dst[95:80] := SaturateU16(b[63:32]) -tmp_dst[111:96] := SaturateU16(b[95:64]) -tmp_dst[127:112] := SaturateU16(b[127:96]) -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := SaturateU16(a[31:0]) -tmp_dst[31:16] := SaturateU16(a[63:32]) -tmp_dst[47:32] := SaturateU16(a[95:64]) -tmp_dst[63:48] := SaturateU16(a[127:96]) -tmp_dst[79:64] := SaturateU16(b[31:0]) -tmp_dst[95:80] := SaturateU16(b[63:32]) -tmp_dst[111:96] := SaturateU16(b[95:64]) -tmp_dst[127:112] := SaturateU16(b[127:96]) -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[7:0] := SaturateU8(a[15:0]) -tmp_dst[15:8] := SaturateU8(a[31:16]) -tmp_dst[23:16] := SaturateU8(a[47:32]) -tmp_dst[31:24] := SaturateU8(a[63:48]) -tmp_dst[39:32] := SaturateU8(a[79:64]) -tmp_dst[47:40] := SaturateU8(a[95:80]) -tmp_dst[55:48] := SaturateU8(a[111:96]) -tmp_dst[63:56] := SaturateU8(a[127:112]) -tmp_dst[71:64] := SaturateU8(b[15:0]) -tmp_dst[79:72] := SaturateU8(b[31:16]) -tmp_dst[87:80] := SaturateU8(b[47:32]) -tmp_dst[95:88] := SaturateU8(b[63:48]) -tmp_dst[103:96] := SaturateU8(b[79:64]) -tmp_dst[111:104] := SaturateU8(b[95:80]) -tmp_dst[119:112] := SaturateU8(b[111:96]) -tmp_dst[127:120] := SaturateU8(b[127:112]) -tmp_dst[135:128] := SaturateU8(a[143:128]) -tmp_dst[143:136] := SaturateU8(a[159:144]) -tmp_dst[151:144] := SaturateU8(a[175:160]) -tmp_dst[159:152] := SaturateU8(a[191:176]) -tmp_dst[167:160] := SaturateU8(a[207:192]) -tmp_dst[175:168] := SaturateU8(a[223:208]) -tmp_dst[183:176] := SaturateU8(a[239:224]) -tmp_dst[191:184] := SaturateU8(a[255:240]) -tmp_dst[199:192] := SaturateU8(b[143:128]) -tmp_dst[207:200] := SaturateU8(b[159:144]) -tmp_dst[215:208] := SaturateU8(b[175:160]) -tmp_dst[223:216] := SaturateU8(b[191:176]) -tmp_dst[231:224] := SaturateU8(b[207:192]) -tmp_dst[239:232] := SaturateU8(b[223:208]) -tmp_dst[247:240] := SaturateU8(b[239:224]) -tmp_dst[255:248] := SaturateU8(b[255:240]) -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[7:0] := SaturateU8(a[15:0]) -tmp_dst[15:8] := SaturateU8(a[31:16]) -tmp_dst[23:16] := SaturateU8(a[47:32]) -tmp_dst[31:24] := SaturateU8(a[63:48]) -tmp_dst[39:32] := SaturateU8(a[79:64]) -tmp_dst[47:40] := SaturateU8(a[95:80]) -tmp_dst[55:48] := SaturateU8(a[111:96]) -tmp_dst[63:56] := SaturateU8(a[127:112]) -tmp_dst[71:64] := SaturateU8(b[15:0]) -tmp_dst[79:72] := SaturateU8(b[31:16]) -tmp_dst[87:80] := SaturateU8(b[47:32]) -tmp_dst[95:88] := SaturateU8(b[63:48]) -tmp_dst[103:96] := SaturateU8(b[79:64]) -tmp_dst[111:104] := SaturateU8(b[95:80]) -tmp_dst[119:112] := SaturateU8(b[111:96]) -tmp_dst[127:120] := SaturateU8(b[127:112]) -tmp_dst[135:128] := SaturateU8(a[143:128]) -tmp_dst[143:136] := SaturateU8(a[159:144]) -tmp_dst[151:144] := SaturateU8(a[175:160]) -tmp_dst[159:152] := SaturateU8(a[191:176]) -tmp_dst[167:160] := SaturateU8(a[207:192]) -tmp_dst[175:168] := SaturateU8(a[223:208]) -tmp_dst[183:176] := SaturateU8(a[239:224]) -tmp_dst[191:184] := SaturateU8(a[255:240]) -tmp_dst[199:192] := SaturateU8(b[143:128]) -tmp_dst[207:200] := SaturateU8(b[159:144]) -tmp_dst[215:208] := SaturateU8(b[175:160]) -tmp_dst[223:216] := SaturateU8(b[191:176]) -tmp_dst[231:224] := SaturateU8(b[207:192]) -tmp_dst[239:232] := SaturateU8(b[223:208]) -tmp_dst[247:240] := SaturateU8(b[239:224]) -tmp_dst[255:248] := SaturateU8(b[255:240]) -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[7:0] := SaturateU8(a[15:0]) -tmp_dst[15:8] := SaturateU8(a[31:16]) -tmp_dst[23:16] := SaturateU8(a[47:32]) -tmp_dst[31:24] := SaturateU8(a[63:48]) -tmp_dst[39:32] := SaturateU8(a[79:64]) -tmp_dst[47:40] := SaturateU8(a[95:80]) -tmp_dst[55:48] := SaturateU8(a[111:96]) -tmp_dst[63:56] := SaturateU8(a[127:112]) -tmp_dst[71:64] := SaturateU8(b[15:0]) -tmp_dst[79:72] := SaturateU8(b[31:16]) -tmp_dst[87:80] := SaturateU8(b[47:32]) -tmp_dst[95:88] := SaturateU8(b[63:48]) -tmp_dst[103:96] := SaturateU8(b[79:64]) -tmp_dst[111:104] := SaturateU8(b[95:80]) -tmp_dst[119:112] := SaturateU8(b[111:96]) -tmp_dst[127:120] := SaturateU8(b[127:112]) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[7:0] := SaturateU8(a[15:0]) -tmp_dst[15:8] := SaturateU8(a[31:16]) -tmp_dst[23:16] := SaturateU8(a[47:32]) -tmp_dst[31:24] := SaturateU8(a[63:48]) -tmp_dst[39:32] := SaturateU8(a[79:64]) -tmp_dst[47:40] := SaturateU8(a[95:80]) -tmp_dst[55:48] := SaturateU8(a[111:96]) -tmp_dst[63:56] := SaturateU8(a[127:112]) -tmp_dst[71:64] := SaturateU8(b[15:0]) -tmp_dst[79:72] := SaturateU8(b[31:16]) -tmp_dst[87:80] := SaturateU8(b[47:32]) -tmp_dst[95:88] := SaturateU8(b[63:48]) -tmp_dst[103:96] := SaturateU8(b[79:64]) -tmp_dst[111:104] := SaturateU8(b[95:80]) -tmp_dst[119:112] := SaturateU8(b[111:96]) -tmp_dst[127:120] := SaturateU8(b[127:112]) -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := 16*j - l := 8*j - dst[l+7:l] := Saturate8(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 16*j - l := 8*j - dst[l+7:l] := Saturate8(a[i+15:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := SignExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := SignExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := SignExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := SignExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := 16*j - l := 8*j - dst[l+7:l] := SaturateU8(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 16*j - l := 8*j - dst[l+7:l] := SaturateU8(a[i+15:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 15 - i := 16*j - l := 8*j - dst[l+7:l] := Truncate8(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 16*j - l := 8*j - dst[l+7:l] := Truncate8(a[i+15:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := ZeroExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := ZeroExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := ZeroExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := ZeroExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Convert -
- - - - - - Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Set -
- - - - - Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Set -
- - - - - - Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Set -
- - - - - Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Set -
- - - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Set -
- - - - - Broadcast 16-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Set -
- - - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Set -
- - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Set -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*8 - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*8 - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*16 - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*16 - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 31 - i := j*8 - k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 15 - i := j*8 - k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 15 - i := j*16 - k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 7 - i := j*16 - k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 31 - i := j*8 - IF k1[j] - k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 31 - i := j*8 - k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 15 - i := j*8 - IF k1[j] - k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 15 - i := j*8 - k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 15 - i := j*16 - IF k1[j] - k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 15 - i := j*16 - k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 7 - i := j*16 - IF k1[j] - k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 7 - i := j*16 - k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512BW - AVX512VL -
immintrin.h
- Shift -
- - - - Reduce the packed 16-bit integers in "a" by addition. Returns the sum of all elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[15:0] + src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] + src[i+16*len+31:i+16*len] - ENDFOR - RETURN REDUCE_ADD(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_ADD(a, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 16-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[15:0] + src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] + src[i+16*len+15:i+16*len] - ENDFOR - RETURN REDUCE_ADD(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0 - FI -ENDFOR -dst[15:0] := REDUCE_ADD(tmp, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 16-bit integers in "a" by addition. Returns the sum of all elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[15:0] + src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] + src[i+16*len+31:i+16*len] - ENDFOR - RETURN REDUCE_ADD(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_ADD(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 16-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[15:0] + src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] + src[i+16*len+15:i+16*len] - ENDFOR - RETURN REDUCE_ADD(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0 - FI -ENDFOR -dst[15:0] := REDUCE_ADD(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 8-bit integers in "a" by addition. Returns the sum of all elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[7:0] + src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] + src[i+8*len+15:i+8*len] - ENDFOR - RETURN REDUCE_ADD(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_ADD(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 8-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[7:0] + src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] + src[i+8*len+7:i+8*len] - ENDFOR - RETURN REDUCE_ADD(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0 - FI -ENDFOR -dst[7:0] := REDUCE_ADD(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 8-bit integers in "a" by addition. Returns the sum of all elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[7:0] + src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] + src[i+8*len+15:i+8*len] - ENDFOR - RETURN REDUCE_ADD(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_ADD(a, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 8-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[7:0] + src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] + src[i+8*len+7:i+8*len] - ENDFOR - RETURN REDUCE_ADD(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 31 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0 - FI -ENDFOR -dst[7:0] := REDUCE_ADD(tmp, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[15:0] * src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] * src[i+16*len+31:i+16*len] - ENDFOR - RETURN REDUCE_MUL(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MUL(a, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[15:0] * src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] * src[i+16*len+15:i+16*len] - ENDFOR - RETURN REDUCE_MUL(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 1 - FI -ENDFOR -dst[15:0] := REDUCE_MUL(tmp, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[15:0] * src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] * src[i+16*len+31:i+16*len] - ENDFOR - RETURN REDUCE_MUL(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MUL(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[15:0] * src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] * src[i+16*len+15:i+16*len] - ENDFOR - RETURN REDUCE_MUL(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 1 - FI -ENDFOR -dst[15:0] := REDUCE_MUL(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[7:0] * src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] * src[i+8*len+15:i+8*len] - ENDFOR - RETURN REDUCE_MUL(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MUL(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[7:0] * src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] * src[i+8*len+7:i+8*len] - ENDFOR - RETURN REDUCE_MUL(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 1 - FI -ENDFOR -dst[7:0] := REDUCE_MUL(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[7:0] * src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] * src[i+8*len+15:i+8*len] - ENDFOR - RETURN REDUCE_MUL(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MUL(a, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[7:0] * src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] * src[i+8*len+7:i+8*len] - ENDFOR - RETURN REDUCE_MUL(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 31 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 1 - FI -ENDFOR -dst[7:0] := REDUCE_MUL(tmp, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[15:0] OR src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] OR src[i+16*len+31:i+16*len] - ENDFOR - RETURN REDUCE_OR(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_OR(a, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[15:0] OR src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] OR src[i+16*len+15:i+16*len] - ENDFOR - RETURN REDUCE_OR(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0 - FI -ENDFOR -dst[15:0] := REDUCE_OR(tmp, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[15:0] OR src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] OR src[i+16*len+31:i+16*len] - ENDFOR - RETURN REDUCE_OR(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_OR(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[15:0] OR src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] OR src[i+16*len+15:i+16*len] - ENDFOR - RETURN REDUCE_OR(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0 - FI -ENDFOR -dst[15:0] := REDUCE_OR(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[7:0] OR src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] OR src[i+8*len+15:i+8*len] - ENDFOR - RETURN REDUCE_OR(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_OR(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[7:0] OR src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] OR src[i+8*len+7:i+8*len] - ENDFOR - RETURN REDUCE_OR(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0 - FI -ENDFOR -dst[7:0] := REDUCE_OR(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[7:0] OR src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] OR src[i+8*len+15:i+8*len] - ENDFOR - RETURN REDUCE_OR(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_OR(a, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[7:0] OR src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] OR src[i+8*len+7:i+8*len] - ENDFOR - RETURN REDUCE_OR(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 31 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0 - FI -ENDFOR -dst[7:0] := REDUCE_OR(tmp, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[15:0] AND src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] AND src[i+16*len+31:i+16*len] - ENDFOR - RETURN REDUCE_AND(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_AND(a, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[15:0] AND src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] AND src[i+16*len+15:i+16*len] - ENDFOR - RETURN REDUCE_AND(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0xFFFF - FI -ENDFOR -dst[15:0] := REDUCE_AND(tmp, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 16-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[15:0] AND src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] AND src[i+16*len+31:i+16*len] - ENDFOR - RETURN REDUCE_AND(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_AND(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 16-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[15:0] AND src[31:16] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := src[i+15:i] AND src[i+16*len+15:i+16*len] - ENDFOR - RETURN REDUCE_AND(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0xFFFF - FI -ENDFOR -dst[15:0] := REDUCE_AND(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[7:0] AND src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] AND src[i+8*len+15:i+8*len] - ENDFOR - RETURN REDUCE_AND(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_AND(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[7:0] AND src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] AND src[i+8*len+7:i+8*len] - ENDFOR - RETURN REDUCE_AND(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0xFF - FI -ENDFOR -dst[7:0] := REDUCE_AND(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 8-bit integers in "a" by multiplication. Returns the sum of all elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[7:0] AND src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] AND src[i+8*len+15:i+8*len] - ENDFOR - RETURN REDUCE_AND(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_AND(a, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 8-bit integers in "a" by multiplication using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[7:0] AND src[15:8] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := src[i+7:i] AND src[i+8*len+7:i+8*len] - ENDFOR - RETURN REDUCE_AND(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 31 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0xFF - FI -ENDFOR -dst[7:0] := REDUCE_AND(tmp, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed signed 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MAX(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MAX(a, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MAX(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := Int16(-0x8000) - FI -ENDFOR -dst[15:0] := REDUCE_MAX(tmp, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MAX(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MAX(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MAX(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := Int16(-0x8000) - FI -ENDFOR -dst[15:0] := REDUCE_MAX(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MAX(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MAX(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MAX(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := Int8(-0x80) - FI -ENDFOR -dst[7:0] := REDUCE_MAX(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MAX(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MAX(a, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MAX(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 31 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := Int8(-0x80) - FI -ENDFOR -dst[7:0] := REDUCE_MAX(tmp, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MAX(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MAX(a, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MAX(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0 - FI -ENDFOR -dst[15:0] := REDUCE_MAX(tmp, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 16-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MAX(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MAX(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 16-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MAX(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0 - FI -ENDFOR -dst[15:0] := REDUCE_MAX(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MAX(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MAX(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MAX(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0 - FI -ENDFOR -dst[7:0] := REDUCE_MAX(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 8-bit integers in "a" by maximum. Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MAX(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MAX(a, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 8-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MAX(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 31 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0 - FI -ENDFOR -dst[7:0] := REDUCE_MAX(tmp, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MIN(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MIN(a, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MIN(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := Int16(0x7FFF) - FI -ENDFOR -dst[15:0] := REDUCE_MIN(tmp, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MIN(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MIN(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MIN(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := Int16(0x7FFF) - FI -ENDFOR -dst[15:0] := REDUCE_MIN(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MIN(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MIN(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MIN(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := Int8(0x7F) - FI -ENDFOR -dst[7:0] := REDUCE_MIN(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MIN(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MIN(a, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MIN(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 31 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := Int8(0x7F) - FI -ENDFOR -dst[7:0] := REDUCE_MIN(tmp, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MIN(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MIN(a, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MIN(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0xFFFF - FI -ENDFOR -dst[15:0] := REDUCE_MIN(tmp, 8) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 16-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MIN(src[16*len-1:0], len) -} -dst[15:0] := REDUCE_MIN(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 16-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*16 - src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len]) - ENDFOR - RETURN REDUCE_MIN(src[16*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[i+15:i] := a[i+15:i] - ELSE - tmp[i+15:i] := 0xFFFF - FI -ENDFOR -dst[15:0] := REDUCE_MIN(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MIN(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MIN(a, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MIN(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0xFF - FI -ENDFOR -dst[7:0] := REDUCE_MIN(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 8-bit integers in "a" by minimum. Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MIN(src[8*len-1:0], len) -} -dst[7:0] := REDUCE_MIN(a, 32) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 8-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*8 - src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len]) - ENDFOR - RETURN REDUCE_MIN(src[8*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*8 - IF k[j] - tmp[i+7:i] := a[i+7:i] - ELSE - tmp[i+7:i] := 0xFF - FI -ENDFOR -dst[7:0] := REDUCE_MIN(tmp, 16) - - AVX512BW - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Unpack and interleave 32 bits from masks "a" and "b", and store the 64-bit result in "dst". - -dst[31:0] := b[31:0] -dst[63:32] := a[31:0] -dst[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Unpack and interleave 16 bits from masks "a" and "b", and store the 32-bit result in "dst". - -dst[15:0] := b[15:0] -dst[31:16] := a[15:0] -dst[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -FOR i := 0 to 3 - tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] - tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] - tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] - tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] -ENDFOR -FOR j := 0 to 7 - i := j*64 - dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -FOR i := 0 to 3 - tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] - tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] - tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] - tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] -ENDFOR -FOR j := 0 to 7 - i := j*64 - tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. - -FOR i := 0 to 3 - tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ] - tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ] - tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ] - tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ] -ENDFOR -FOR j := 0 to 7 - i := j*64 - tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\ - ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24]) - - tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\ - ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32]) - - tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\ - ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40]) - - tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\ - ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48]) -ENDFOR -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". - -FOR j := 0 to 3 - i := j*128 - tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) - dst[i+127:i] := tmp[127:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*128 - tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) - tmp_dst[i+127:i] := tmp[127:0] -ENDFOR -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*128 - tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8) - tmp_dst[i+127:i] := tmp[127:0] -ENDFOR -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := b[i+7:i] - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := b[i+15:i] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := a[7:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := a[15:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - off := 16*idx[i+4:i] - dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := idx[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - off := 16*idx[i+4:i] - dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - off := 16*idx[i+4:i] - dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - off := 16*idx[i+4:i] - dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off] -ENDFOR -dst[MAX:512] := 0 - - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - id := idx[i+4:i]*16 - IF k[j] - dst[i+15:i] := a[id+15:id] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - id := idx[i+4:i]*16 - IF k[j] - dst[i+15:i] := a[id+15:id] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - id := idx[i+4:i]*16 - dst[i+15:i] := a[id+15:id] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a". - -FOR j := 0 to 63 - i := j*8 - IF a[i+7] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := 0xFF - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := 0xFFFF - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a". - -FOR j := 0 to 31 - i := j*16 - IF a[i+15] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". - -FOR j := 0 to 63 - i := j*8 - tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) -ENDFOR -FOR j := 0 to 7 - i := j*64 - dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ - tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] - dst[i+63:i+16] := 0 -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 8-bit integers in "a" within 128-bit lanes using the control in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[5:0] := b[i+3:i] + (j & 0x30) - dst[i+7:i] := a[index*8+7:index*8] - FI - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Swizzle -
- - - - - - Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[5:0] := b[i+3:i] + (j & 0x30) - dst[i+7:i] := a[index*8+7:index*8] - FI - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Swizzle -
- - - - - Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[5:0] := b[i+3:i] + (j & 0x30) - dst[i+7:i] := a[index*8+7:index*8] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[63:0] := a[63:0] -tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] -tmp_dst[191:128] := a[191:128] -tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] -tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] -tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] -tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] -tmp_dst[319:256] := a[319:256] -tmp_dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] -tmp_dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] -tmp_dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] -tmp_dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] -tmp_dst[447:384] := a[447:384] -tmp_dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] -tmp_dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] -tmp_dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] -tmp_dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[63:0] := a[63:0] -tmp_dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -tmp_dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -tmp_dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -tmp_dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] -tmp_dst[191:128] := a[191:128] -tmp_dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] -tmp_dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] -tmp_dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] -tmp_dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] -tmp_dst[319:256] := a[319:256] -tmp_dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] -tmp_dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] -tmp_dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] -tmp_dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] -tmp_dst[447:384] := a[447:384] -tmp_dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] -tmp_dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] -tmp_dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] -tmp_dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst". - -dst[63:0] := a[63:0] -dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] -dst[191:128] := a[191:128] -dst[207:192] := (a >> (imm8[1:0] * 16))[207:192] -dst[223:208] := (a >> (imm8[3:2] * 16))[207:192] -dst[239:224] := (a >> (imm8[5:4] * 16))[207:192] -dst[255:240] := (a >> (imm8[7:6] * 16))[207:192] -dst[319:256] := a[319:256] -dst[335:320] := (a >> (imm8[1:0] * 16))[335:320] -dst[351:336] := (a >> (imm8[3:2] * 16))[335:320] -dst[367:352] := (a >> (imm8[5:4] * 16))[335:320] -dst[383:368] := (a >> (imm8[7:6] * 16))[335:320] -dst[447:384] := a[447:384] -dst[463:448] := (a >> (imm8[1:0] * 16))[463:448] -dst[479:464] := (a >> (imm8[3:2] * 16))[463:448] -dst[495:480] := (a >> (imm8[5:4] * 16))[463:448] -dst[511:496] := (a >> (imm8[7:6] * 16))[463:448] -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -tmp_dst[127:64] := a[127:64] -tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] -tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] -tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] -tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] -tmp_dst[255:192] := a[255:192] -tmp_dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] -tmp_dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] -tmp_dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] -tmp_dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] -tmp_dst[383:320] := a[383:320] -tmp_dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] -tmp_dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] -tmp_dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] -tmp_dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] -tmp_dst[511:448] := a[511:448] -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -tmp_dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -tmp_dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -tmp_dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -tmp_dst[127:64] := a[127:64] -tmp_dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] -tmp_dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] -tmp_dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] -tmp_dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] -tmp_dst[255:192] := a[255:192] -tmp_dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] -tmp_dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] -tmp_dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] -tmp_dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] -tmp_dst[383:320] := a[383:320] -tmp_dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] -tmp_dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] -tmp_dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] -tmp_dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] -tmp_dst[511:448] := a[511:448] -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst". - -dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -dst[127:64] := a[127:64] -dst[143:128] := (a >> (imm8[1:0] * 16))[143:128] -dst[159:144] := (a >> (imm8[3:2] * 16))[143:128] -dst[175:160] := (a >> (imm8[5:4] * 16))[143:128] -dst[191:176] := (a >> (imm8[7:6] * 16))[143:128] -dst[255:192] := a[255:192] -dst[271:256] := (a >> (imm8[1:0] * 16))[271:256] -dst[287:272] := (a >> (imm8[3:2] * 16))[271:256] -dst[303:288] := (a >> (imm8[5:4] * 16))[271:256] -dst[319:304] := (a >> (imm8[7:6] * 16))[271:256] -dst[383:320] := a[383:320] -dst[399:384] := (a >> (imm8[1:0] * 16))[399:384] -dst[415:400] := (a >> (imm8[3:2] * 16))[399:384] -dst[431:416] := (a >> (imm8[5:4] * 16))[399:384] -dst[447:432] := (a >> (imm8[7:6] * 16))[399:384] -dst[511:448] := a[511:448] -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Miscellaneous -
- - - - - - Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Load -
- - - - - Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Load -
- - - - - - Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Load -
- - - - - Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 32 packed 16-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 64 packed 8-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Load -
- - - - Load 32-bit mask from memory into "k". - -k[31:0] := MEM[mem_addr+31:mem_addr] - - - AVX512BW -
immintrin.h
- Load -
- - - - Load 64-bit mask from memory into "k". - -k[63:0] := MEM[mem_addr+63:mem_addr] - - - AVX512BW -
immintrin.h
- Load -
- - - - - - Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Move -
- - - - - Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Move -
- - - - - - Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Move -
- - - - - Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Move -
- - - - - - Store packed 16-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 31 - i := j*16 - IF k[j] - MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i] - FI -ENDFOR - - - AVX512BW -
immintrin.h
- Store -
- - - - - - Store packed 8-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 63 - i := j*8 - IF k[j] - MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] - FI -ENDFOR - - - AVX512BW -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 32 packed 16-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512BW -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 64 packed 8-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512BW -
immintrin.h
- Store -
- - - - - Store 32-bit mask from "a" into memory. - -MEM[mem_addr+31:mem_addr] := a[31:0] - - - AVX512BW -
immintrin.h
- Store -
- - - - - Store 64-bit mask from "a" into memory. - -MEM[mem_addr+63:mem_addr] := a[63:0] - - - AVX512BW -
immintrin.h
- Store -
- - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := ABS(a[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := ABS(a[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := ABS(a[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ABS(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ABS(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ABS(a[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := a[i+7:i] + b[i+7:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] + b[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] + b[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := a[i+15:i] + b[i+15:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] + b[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] + b[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". - -FOR j := 0 to 31 - i := j*16 - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 31 - i := j*16 - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 31 - i := j*16 - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 31 - i := j*16 - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - b[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[i+7:i] - b[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := a[i+7:i] - b[i+7:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - b[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[i+15:i] - b[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := a[i+15:i] - b[i+15:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Arithmetic -
- - Miscellaneous - - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := Saturate16(a[31:0]) -tmp_dst[31:16] := Saturate16(a[63:32]) -tmp_dst[47:32] := Saturate16(a[95:64]) -tmp_dst[63:48] := Saturate16(a[127:96]) -tmp_dst[79:64] := Saturate16(b[31:0]) -tmp_dst[95:80] := Saturate16(b[63:32]) -tmp_dst[111:96] := Saturate16(b[95:64]) -tmp_dst[127:112] := Saturate16(b[127:96]) -tmp_dst[143:128] := Saturate16(a[159:128]) -tmp_dst[159:144] := Saturate16(a[191:160]) -tmp_dst[175:160] := Saturate16(a[223:192]) -tmp_dst[191:176] := Saturate16(a[255:224]) -tmp_dst[207:192] := Saturate16(b[159:128]) -tmp_dst[223:208] := Saturate16(b[191:160]) -tmp_dst[239:224] := Saturate16(b[223:192]) -tmp_dst[255:240] := Saturate16(b[255:224]) -tmp_dst[271:256] := Saturate16(a[287:256]) -tmp_dst[287:272] := Saturate16(a[319:288]) -tmp_dst[303:288] := Saturate16(a[351:320]) -tmp_dst[319:304] := Saturate16(a[383:352]) -tmp_dst[335:320] := Saturate16(b[287:256]) -tmp_dst[351:336] := Saturate16(b[319:288]) -tmp_dst[367:352] := Saturate16(b[351:320]) -tmp_dst[383:368] := Saturate16(b[383:352]) -tmp_dst[399:384] := Saturate16(a[415:384]) -tmp_dst[415:400] := Saturate16(a[447:416]) -tmp_dst[431:416] := Saturate16(a[479:448]) -tmp_dst[447:432] := Saturate16(a[511:480]) -tmp_dst[463:448] := Saturate16(b[415:384]) -tmp_dst[479:464] := Saturate16(b[447:416]) -tmp_dst[495:480] := Saturate16(b[479:448]) -tmp_dst[511:496] := Saturate16(b[511:480]) -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := Saturate16(a[31:0]) -tmp_dst[31:16] := Saturate16(a[63:32]) -tmp_dst[47:32] := Saturate16(a[95:64]) -tmp_dst[63:48] := Saturate16(a[127:96]) -tmp_dst[79:64] := Saturate16(b[31:0]) -tmp_dst[95:80] := Saturate16(b[63:32]) -tmp_dst[111:96] := Saturate16(b[95:64]) -tmp_dst[127:112] := Saturate16(b[127:96]) -tmp_dst[143:128] := Saturate16(a[159:128]) -tmp_dst[159:144] := Saturate16(a[191:160]) -tmp_dst[175:160] := Saturate16(a[223:192]) -tmp_dst[191:176] := Saturate16(a[255:224]) -tmp_dst[207:192] := Saturate16(b[159:128]) -tmp_dst[223:208] := Saturate16(b[191:160]) -tmp_dst[239:224] := Saturate16(b[223:192]) -tmp_dst[255:240] := Saturate16(b[255:224]) -tmp_dst[271:256] := Saturate16(a[287:256]) -tmp_dst[287:272] := Saturate16(a[319:288]) -tmp_dst[303:288] := Saturate16(a[351:320]) -tmp_dst[319:304] := Saturate16(a[383:352]) -tmp_dst[335:320] := Saturate16(b[287:256]) -tmp_dst[351:336] := Saturate16(b[319:288]) -tmp_dst[367:352] := Saturate16(b[351:320]) -tmp_dst[383:368] := Saturate16(b[383:352]) -tmp_dst[399:384] := Saturate16(a[415:384]) -tmp_dst[415:400] := Saturate16(a[447:416]) -tmp_dst[431:416] := Saturate16(a[479:448]) -tmp_dst[447:432] := Saturate16(a[511:480]) -tmp_dst[463:448] := Saturate16(b[415:384]) -tmp_dst[479:464] := Saturate16(b[447:416]) -tmp_dst[495:480] := Saturate16(b[479:448]) -tmp_dst[511:496] := Saturate16(b[511:480]) -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". - -dst[15:0] := Saturate16(a[31:0]) -dst[31:16] := Saturate16(a[63:32]) -dst[47:32] := Saturate16(a[95:64]) -dst[63:48] := Saturate16(a[127:96]) -dst[79:64] := Saturate16(b[31:0]) -dst[95:80] := Saturate16(b[63:32]) -dst[111:96] := Saturate16(b[95:64]) -dst[127:112] := Saturate16(b[127:96]) -dst[143:128] := Saturate16(a[159:128]) -dst[159:144] := Saturate16(a[191:160]) -dst[175:160] := Saturate16(a[223:192]) -dst[191:176] := Saturate16(a[255:224]) -dst[207:192] := Saturate16(b[159:128]) -dst[223:208] := Saturate16(b[191:160]) -dst[239:224] := Saturate16(b[223:192]) -dst[255:240] := Saturate16(b[255:224]) -dst[271:256] := Saturate16(a[287:256]) -dst[287:272] := Saturate16(a[319:288]) -dst[303:288] := Saturate16(a[351:320]) -dst[319:304] := Saturate16(a[383:352]) -dst[335:320] := Saturate16(b[287:256]) -dst[351:336] := Saturate16(b[319:288]) -dst[367:352] := Saturate16(b[351:320]) -dst[383:368] := Saturate16(b[383:352]) -dst[399:384] := Saturate16(a[415:384]) -dst[415:400] := Saturate16(a[447:416]) -dst[431:416] := Saturate16(a[479:448]) -dst[447:432] := Saturate16(a[511:480]) -dst[463:448] := Saturate16(b[415:384]) -dst[479:464] := Saturate16(b[447:416]) -dst[495:480] := Saturate16(b[479:448]) -dst[511:496] := Saturate16(b[511:480]) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[7:0] := Saturate8(a[15:0]) -tmp_dst[15:8] := Saturate8(a[31:16]) -tmp_dst[23:16] := Saturate8(a[47:32]) -tmp_dst[31:24] := Saturate8(a[63:48]) -tmp_dst[39:32] := Saturate8(a[79:64]) -tmp_dst[47:40] := Saturate8(a[95:80]) -tmp_dst[55:48] := Saturate8(a[111:96]) -tmp_dst[63:56] := Saturate8(a[127:112]) -tmp_dst[71:64] := Saturate8(b[15:0]) -tmp_dst[79:72] := Saturate8(b[31:16]) -tmp_dst[87:80] := Saturate8(b[47:32]) -tmp_dst[95:88] := Saturate8(b[63:48]) -tmp_dst[103:96] := Saturate8(b[79:64]) -tmp_dst[111:104] := Saturate8(b[95:80]) -tmp_dst[119:112] := Saturate8(b[111:96]) -tmp_dst[127:120] := Saturate8(b[127:112]) -tmp_dst[135:128] := Saturate8(a[143:128]) -tmp_dst[143:136] := Saturate8(a[159:144]) -tmp_dst[151:144] := Saturate8(a[175:160]) -tmp_dst[159:152] := Saturate8(a[191:176]) -tmp_dst[167:160] := Saturate8(a[207:192]) -tmp_dst[175:168] := Saturate8(a[223:208]) -tmp_dst[183:176] := Saturate8(a[239:224]) -tmp_dst[191:184] := Saturate8(a[255:240]) -tmp_dst[199:192] := Saturate8(b[143:128]) -tmp_dst[207:200] := Saturate8(b[159:144]) -tmp_dst[215:208] := Saturate8(b[175:160]) -tmp_dst[223:216] := Saturate8(b[191:176]) -tmp_dst[231:224] := Saturate8(b[207:192]) -tmp_dst[239:232] := Saturate8(b[223:208]) -tmp_dst[247:240] := Saturate8(b[239:224]) -tmp_dst[255:248] := Saturate8(b[255:240]) -tmp_dst[263:256] := Saturate8(a[271:256]) -tmp_dst[271:264] := Saturate8(a[287:272]) -tmp_dst[279:272] := Saturate8(a[303:288]) -tmp_dst[287:280] := Saturate8(a[319:304]) -tmp_dst[295:288] := Saturate8(a[335:320]) -tmp_dst[303:296] := Saturate8(a[351:336]) -tmp_dst[311:304] := Saturate8(a[367:352]) -tmp_dst[319:312] := Saturate8(a[383:368]) -tmp_dst[327:320] := Saturate8(b[271:256]) -tmp_dst[335:328] := Saturate8(b[287:272]) -tmp_dst[343:336] := Saturate8(b[303:288]) -tmp_dst[351:344] := Saturate8(b[319:304]) -tmp_dst[359:352] := Saturate8(b[335:320]) -tmp_dst[367:360] := Saturate8(b[351:336]) -tmp_dst[375:368] := Saturate8(b[367:352]) -tmp_dst[383:376] := Saturate8(b[383:368]) -tmp_dst[391:384] := Saturate8(a[399:384]) -tmp_dst[399:392] := Saturate8(a[415:400]) -tmp_dst[407:400] := Saturate8(a[431:416]) -tmp_dst[415:408] := Saturate8(a[447:432]) -tmp_dst[423:416] := Saturate8(a[463:448]) -tmp_dst[431:424] := Saturate8(a[479:464]) -tmp_dst[439:432] := Saturate8(a[495:480]) -tmp_dst[447:440] := Saturate8(a[511:496]) -tmp_dst[455:448] := Saturate8(b[399:384]) -tmp_dst[463:456] := Saturate8(b[415:400]) -tmp_dst[471:464] := Saturate8(b[431:416]) -tmp_dst[479:472] := Saturate8(b[447:432]) -tmp_dst[487:480] := Saturate8(b[463:448]) -tmp_dst[495:488] := Saturate8(b[479:464]) -tmp_dst[503:496] := Saturate8(b[495:480]) -tmp_dst[511:504] := Saturate8(b[511:496]) -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[7:0] := Saturate8(a[15:0]) -tmp_dst[15:8] := Saturate8(a[31:16]) -tmp_dst[23:16] := Saturate8(a[47:32]) -tmp_dst[31:24] := Saturate8(a[63:48]) -tmp_dst[39:32] := Saturate8(a[79:64]) -tmp_dst[47:40] := Saturate8(a[95:80]) -tmp_dst[55:48] := Saturate8(a[111:96]) -tmp_dst[63:56] := Saturate8(a[127:112]) -tmp_dst[71:64] := Saturate8(b[15:0]) -tmp_dst[79:72] := Saturate8(b[31:16]) -tmp_dst[87:80] := Saturate8(b[47:32]) -tmp_dst[95:88] := Saturate8(b[63:48]) -tmp_dst[103:96] := Saturate8(b[79:64]) -tmp_dst[111:104] := Saturate8(b[95:80]) -tmp_dst[119:112] := Saturate8(b[111:96]) -tmp_dst[127:120] := Saturate8(b[127:112]) -tmp_dst[135:128] := Saturate8(a[143:128]) -tmp_dst[143:136] := Saturate8(a[159:144]) -tmp_dst[151:144] := Saturate8(a[175:160]) -tmp_dst[159:152] := Saturate8(a[191:176]) -tmp_dst[167:160] := Saturate8(a[207:192]) -tmp_dst[175:168] := Saturate8(a[223:208]) -tmp_dst[183:176] := Saturate8(a[239:224]) -tmp_dst[191:184] := Saturate8(a[255:240]) -tmp_dst[199:192] := Saturate8(b[143:128]) -tmp_dst[207:200] := Saturate8(b[159:144]) -tmp_dst[215:208] := Saturate8(b[175:160]) -tmp_dst[223:216] := Saturate8(b[191:176]) -tmp_dst[231:224] := Saturate8(b[207:192]) -tmp_dst[239:232] := Saturate8(b[223:208]) -tmp_dst[247:240] := Saturate8(b[239:224]) -tmp_dst[255:248] := Saturate8(b[255:240]) -tmp_dst[263:256] := Saturate8(a[271:256]) -tmp_dst[271:264] := Saturate8(a[287:272]) -tmp_dst[279:272] := Saturate8(a[303:288]) -tmp_dst[287:280] := Saturate8(a[319:304]) -tmp_dst[295:288] := Saturate8(a[335:320]) -tmp_dst[303:296] := Saturate8(a[351:336]) -tmp_dst[311:304] := Saturate8(a[367:352]) -tmp_dst[319:312] := Saturate8(a[383:368]) -tmp_dst[327:320] := Saturate8(b[271:256]) -tmp_dst[335:328] := Saturate8(b[287:272]) -tmp_dst[343:336] := Saturate8(b[303:288]) -tmp_dst[351:344] := Saturate8(b[319:304]) -tmp_dst[359:352] := Saturate8(b[335:320]) -tmp_dst[367:360] := Saturate8(b[351:336]) -tmp_dst[375:368] := Saturate8(b[367:352]) -tmp_dst[383:376] := Saturate8(b[383:368]) -tmp_dst[391:384] := Saturate8(a[399:384]) -tmp_dst[399:392] := Saturate8(a[415:400]) -tmp_dst[407:400] := Saturate8(a[431:416]) -tmp_dst[415:408] := Saturate8(a[447:432]) -tmp_dst[423:416] := Saturate8(a[463:448]) -tmp_dst[431:424] := Saturate8(a[479:464]) -tmp_dst[439:432] := Saturate8(a[495:480]) -tmp_dst[447:440] := Saturate8(a[511:496]) -tmp_dst[455:448] := Saturate8(b[399:384]) -tmp_dst[463:456] := Saturate8(b[415:400]) -tmp_dst[471:464] := Saturate8(b[431:416]) -tmp_dst[479:472] := Saturate8(b[447:432]) -tmp_dst[487:480] := Saturate8(b[463:448]) -tmp_dst[495:488] := Saturate8(b[479:464]) -tmp_dst[503:496] := Saturate8(b[495:480]) -tmp_dst[511:504] := Saturate8(b[511:496]) -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". - -dst[7:0] := Saturate8(a[15:0]) -dst[15:8] := Saturate8(a[31:16]) -dst[23:16] := Saturate8(a[47:32]) -dst[31:24] := Saturate8(a[63:48]) -dst[39:32] := Saturate8(a[79:64]) -dst[47:40] := Saturate8(a[95:80]) -dst[55:48] := Saturate8(a[111:96]) -dst[63:56] := Saturate8(a[127:112]) -dst[71:64] := Saturate8(b[15:0]) -dst[79:72] := Saturate8(b[31:16]) -dst[87:80] := Saturate8(b[47:32]) -dst[95:88] := Saturate8(b[63:48]) -dst[103:96] := Saturate8(b[79:64]) -dst[111:104] := Saturate8(b[95:80]) -dst[119:112] := Saturate8(b[111:96]) -dst[127:120] := Saturate8(b[127:112]) -dst[135:128] := Saturate8(a[143:128]) -dst[143:136] := Saturate8(a[159:144]) -dst[151:144] := Saturate8(a[175:160]) -dst[159:152] := Saturate8(a[191:176]) -dst[167:160] := Saturate8(a[207:192]) -dst[175:168] := Saturate8(a[223:208]) -dst[183:176] := Saturate8(a[239:224]) -dst[191:184] := Saturate8(a[255:240]) -dst[199:192] := Saturate8(b[143:128]) -dst[207:200] := Saturate8(b[159:144]) -dst[215:208] := Saturate8(b[175:160]) -dst[223:216] := Saturate8(b[191:176]) -dst[231:224] := Saturate8(b[207:192]) -dst[239:232] := Saturate8(b[223:208]) -dst[247:240] := Saturate8(b[239:224]) -dst[255:248] := Saturate8(b[255:240]) -dst[263:256] := Saturate8(a[271:256]) -dst[271:264] := Saturate8(a[287:272]) -dst[279:272] := Saturate8(a[303:288]) -dst[287:280] := Saturate8(a[319:304]) -dst[295:288] := Saturate8(a[335:320]) -dst[303:296] := Saturate8(a[351:336]) -dst[311:304] := Saturate8(a[367:352]) -dst[319:312] := Saturate8(a[383:368]) -dst[327:320] := Saturate8(b[271:256]) -dst[335:328] := Saturate8(b[287:272]) -dst[343:336] := Saturate8(b[303:288]) -dst[351:344] := Saturate8(b[319:304]) -dst[359:352] := Saturate8(b[335:320]) -dst[367:360] := Saturate8(b[351:336]) -dst[375:368] := Saturate8(b[367:352]) -dst[383:376] := Saturate8(b[383:368]) -dst[391:384] := Saturate8(a[399:384]) -dst[399:392] := Saturate8(a[415:400]) -dst[407:400] := Saturate8(a[431:416]) -dst[415:408] := Saturate8(a[447:432]) -dst[423:416] := Saturate8(a[463:448]) -dst[431:424] := Saturate8(a[479:464]) -dst[439:432] := Saturate8(a[495:480]) -dst[447:440] := Saturate8(a[511:496]) -dst[455:448] := Saturate8(b[399:384]) -dst[463:456] := Saturate8(b[415:400]) -dst[471:464] := Saturate8(b[431:416]) -dst[479:472] := Saturate8(b[447:432]) -dst[487:480] := Saturate8(b[463:448]) -dst[495:488] := Saturate8(b[479:464]) -dst[503:496] := Saturate8(b[495:480]) -dst[511:504] := Saturate8(b[511:496]) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[15:0] := SaturateU16(a[31:0]) -tmp_dst[31:16] := SaturateU16(a[63:32]) -tmp_dst[47:32] := SaturateU16(a[95:64]) -tmp_dst[63:48] := SaturateU16(a[127:96]) -tmp_dst[79:64] := SaturateU16(b[31:0]) -tmp_dst[95:80] := SaturateU16(b[63:32]) -tmp_dst[111:96] := SaturateU16(b[95:64]) -tmp_dst[127:112] := SaturateU16(b[127:96]) -tmp_dst[143:128] := SaturateU16(a[159:128]) -tmp_dst[159:144] := SaturateU16(a[191:160]) -tmp_dst[175:160] := SaturateU16(a[223:192]) -tmp_dst[191:176] := SaturateU16(a[255:224]) -tmp_dst[207:192] := SaturateU16(b[159:128]) -tmp_dst[223:208] := SaturateU16(b[191:160]) -tmp_dst[239:224] := SaturateU16(b[223:192]) -tmp_dst[255:240] := SaturateU16(b[255:224]) -tmp_dst[271:256] := SaturateU16(a[287:256]) -tmp_dst[287:272] := SaturateU16(a[319:288]) -tmp_dst[303:288] := SaturateU16(a[351:320]) -tmp_dst[319:304] := SaturateU16(a[383:352]) -tmp_dst[335:320] := SaturateU16(b[287:256]) -tmp_dst[351:336] := SaturateU16(b[319:288]) -tmp_dst[367:352] := SaturateU16(b[351:320]) -tmp_dst[383:368] := SaturateU16(b[383:352]) -tmp_dst[399:384] := SaturateU16(a[415:384]) -tmp_dst[415:400] := SaturateU16(a[447:416]) -tmp_dst[431:416] := SaturateU16(a[479:448]) -tmp_dst[447:432] := SaturateU16(a[511:480]) -tmp_dst[463:448] := SaturateU16(b[415:384]) -tmp_dst[479:464] := SaturateU16(b[447:416]) -tmp_dst[495:480] := SaturateU16(b[479:448]) -tmp_dst[511:496] := SaturateU16(b[511:480]) -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[15:0] := SaturateU16(a[31:0]) -tmp_dst[31:16] := SaturateU16(a[63:32]) -tmp_dst[47:32] := SaturateU16(a[95:64]) -tmp_dst[63:48] := SaturateU16(a[127:96]) -tmp_dst[79:64] := SaturateU16(b[31:0]) -tmp_dst[95:80] := SaturateU16(b[63:32]) -tmp_dst[111:96] := SaturateU16(b[95:64]) -tmp_dst[127:112] := SaturateU16(b[127:96]) -tmp_dst[143:128] := SaturateU16(a[159:128]) -tmp_dst[159:144] := SaturateU16(a[191:160]) -tmp_dst[175:160] := SaturateU16(a[223:192]) -tmp_dst[191:176] := SaturateU16(a[255:224]) -tmp_dst[207:192] := SaturateU16(b[159:128]) -tmp_dst[223:208] := SaturateU16(b[191:160]) -tmp_dst[239:224] := SaturateU16(b[223:192]) -tmp_dst[255:240] := SaturateU16(b[255:224]) -tmp_dst[271:256] := SaturateU16(a[287:256]) -tmp_dst[287:272] := SaturateU16(a[319:288]) -tmp_dst[303:288] := SaturateU16(a[351:320]) -tmp_dst[319:304] := SaturateU16(a[383:352]) -tmp_dst[335:320] := SaturateU16(b[287:256]) -tmp_dst[351:336] := SaturateU16(b[319:288]) -tmp_dst[367:352] := SaturateU16(b[351:320]) -tmp_dst[383:368] := SaturateU16(b[383:352]) -tmp_dst[399:384] := SaturateU16(a[415:384]) -tmp_dst[415:400] := SaturateU16(a[447:416]) -tmp_dst[431:416] := SaturateU16(a[479:448]) -tmp_dst[447:432] := SaturateU16(a[511:480]) -tmp_dst[463:448] := SaturateU16(b[415:384]) -tmp_dst[479:464] := SaturateU16(b[447:416]) -tmp_dst[495:480] := SaturateU16(b[479:448]) -tmp_dst[511:496] := SaturateU16(b[511:480]) -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := tmp_dst[i+15:i] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". - -dst[15:0] := SaturateU16(a[31:0]) -dst[31:16] := SaturateU16(a[63:32]) -dst[47:32] := SaturateU16(a[95:64]) -dst[63:48] := SaturateU16(a[127:96]) -dst[79:64] := SaturateU16(b[31:0]) -dst[95:80] := SaturateU16(b[63:32]) -dst[111:96] := SaturateU16(b[95:64]) -dst[127:112] := SaturateU16(b[127:96]) -dst[143:128] := SaturateU16(a[159:128]) -dst[159:144] := SaturateU16(a[191:160]) -dst[175:160] := SaturateU16(a[223:192]) -dst[191:176] := SaturateU16(a[255:224]) -dst[207:192] := SaturateU16(b[159:128]) -dst[223:208] := SaturateU16(b[191:160]) -dst[239:224] := SaturateU16(b[223:192]) -dst[255:240] := SaturateU16(b[255:224]) -dst[271:256] := SaturateU16(a[287:256]) -dst[287:272] := SaturateU16(a[319:288]) -dst[303:288] := SaturateU16(a[351:320]) -dst[319:304] := SaturateU16(a[383:352]) -dst[335:320] := SaturateU16(b[287:256]) -dst[351:336] := SaturateU16(b[319:288]) -dst[367:352] := SaturateU16(b[351:320]) -dst[383:368] := SaturateU16(b[383:352]) -dst[399:384] := SaturateU16(a[415:384]) -dst[415:400] := SaturateU16(a[447:416]) -dst[431:416] := SaturateU16(a[479:448]) -dst[447:432] := SaturateU16(a[511:480]) -dst[463:448] := SaturateU16(b[415:384]) -dst[479:464] := SaturateU16(b[447:416]) -dst[495:480] := SaturateU16(b[479:448]) -dst[511:496] := SaturateU16(b[511:480]) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[7:0] := SaturateU8(a[15:0]) -tmp_dst[15:8] := SaturateU8(a[31:16]) -tmp_dst[23:16] := SaturateU8(a[47:32]) -tmp_dst[31:24] := SaturateU8(a[63:48]) -tmp_dst[39:32] := SaturateU8(a[79:64]) -tmp_dst[47:40] := SaturateU8(a[95:80]) -tmp_dst[55:48] := SaturateU8(a[111:96]) -tmp_dst[63:56] := SaturateU8(a[127:112]) -tmp_dst[71:64] := SaturateU8(b[15:0]) -tmp_dst[79:72] := SaturateU8(b[31:16]) -tmp_dst[87:80] := SaturateU8(b[47:32]) -tmp_dst[95:88] := SaturateU8(b[63:48]) -tmp_dst[103:96] := SaturateU8(b[79:64]) -tmp_dst[111:104] := SaturateU8(b[95:80]) -tmp_dst[119:112] := SaturateU8(b[111:96]) -tmp_dst[127:120] := SaturateU8(b[127:112]) -tmp_dst[135:128] := SaturateU8(a[143:128]) -tmp_dst[143:136] := SaturateU8(a[159:144]) -tmp_dst[151:144] := SaturateU8(a[175:160]) -tmp_dst[159:152] := SaturateU8(a[191:176]) -tmp_dst[167:160] := SaturateU8(a[207:192]) -tmp_dst[175:168] := SaturateU8(a[223:208]) -tmp_dst[183:176] := SaturateU8(a[239:224]) -tmp_dst[191:184] := SaturateU8(a[255:240]) -tmp_dst[199:192] := SaturateU8(b[143:128]) -tmp_dst[207:200] := SaturateU8(b[159:144]) -tmp_dst[215:208] := SaturateU8(b[175:160]) -tmp_dst[223:216] := SaturateU8(b[191:176]) -tmp_dst[231:224] := SaturateU8(b[207:192]) -tmp_dst[239:232] := SaturateU8(b[223:208]) -tmp_dst[247:240] := SaturateU8(b[239:224]) -tmp_dst[255:248] := SaturateU8(b[255:240]) -tmp_dst[263:256] := SaturateU8(a[271:256]) -tmp_dst[271:264] := SaturateU8(a[287:272]) -tmp_dst[279:272] := SaturateU8(a[303:288]) -tmp_dst[287:280] := SaturateU8(a[319:304]) -tmp_dst[295:288] := SaturateU8(a[335:320]) -tmp_dst[303:296] := SaturateU8(a[351:336]) -tmp_dst[311:304] := SaturateU8(a[367:352]) -tmp_dst[319:312] := SaturateU8(a[383:368]) -tmp_dst[327:320] := SaturateU8(b[271:256]) -tmp_dst[335:328] := SaturateU8(b[287:272]) -tmp_dst[343:336] := SaturateU8(b[303:288]) -tmp_dst[351:344] := SaturateU8(b[319:304]) -tmp_dst[359:352] := SaturateU8(b[335:320]) -tmp_dst[367:360] := SaturateU8(b[351:336]) -tmp_dst[375:368] := SaturateU8(b[367:352]) -tmp_dst[383:376] := SaturateU8(b[383:368]) -tmp_dst[391:384] := SaturateU8(a[399:384]) -tmp_dst[399:392] := SaturateU8(a[415:400]) -tmp_dst[407:400] := SaturateU8(a[431:416]) -tmp_dst[415:408] := SaturateU8(a[447:432]) -tmp_dst[423:416] := SaturateU8(a[463:448]) -tmp_dst[431:424] := SaturateU8(a[479:464]) -tmp_dst[439:432] := SaturateU8(a[495:480]) -tmp_dst[447:440] := SaturateU8(a[511:496]) -tmp_dst[455:448] := SaturateU8(b[399:384]) -tmp_dst[463:456] := SaturateU8(b[415:400]) -tmp_dst[471:464] := SaturateU8(b[431:416]) -tmp_dst[479:472] := SaturateU8(b[447:432]) -tmp_dst[487:480] := SaturateU8(b[463:448]) -tmp_dst[495:488] := SaturateU8(b[479:464]) -tmp_dst[503:496] := SaturateU8(b[495:480]) -tmp_dst[511:504] := SaturateU8(b[511:496]) -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[7:0] := SaturateU8(a[15:0]) -tmp_dst[15:8] := SaturateU8(a[31:16]) -tmp_dst[23:16] := SaturateU8(a[47:32]) -tmp_dst[31:24] := SaturateU8(a[63:48]) -tmp_dst[39:32] := SaturateU8(a[79:64]) -tmp_dst[47:40] := SaturateU8(a[95:80]) -tmp_dst[55:48] := SaturateU8(a[111:96]) -tmp_dst[63:56] := SaturateU8(a[127:112]) -tmp_dst[71:64] := SaturateU8(b[15:0]) -tmp_dst[79:72] := SaturateU8(b[31:16]) -tmp_dst[87:80] := SaturateU8(b[47:32]) -tmp_dst[95:88] := SaturateU8(b[63:48]) -tmp_dst[103:96] := SaturateU8(b[79:64]) -tmp_dst[111:104] := SaturateU8(b[95:80]) -tmp_dst[119:112] := SaturateU8(b[111:96]) -tmp_dst[127:120] := SaturateU8(b[127:112]) -tmp_dst[135:128] := SaturateU8(a[143:128]) -tmp_dst[143:136] := SaturateU8(a[159:144]) -tmp_dst[151:144] := SaturateU8(a[175:160]) -tmp_dst[159:152] := SaturateU8(a[191:176]) -tmp_dst[167:160] := SaturateU8(a[207:192]) -tmp_dst[175:168] := SaturateU8(a[223:208]) -tmp_dst[183:176] := SaturateU8(a[239:224]) -tmp_dst[191:184] := SaturateU8(a[255:240]) -tmp_dst[199:192] := SaturateU8(b[143:128]) -tmp_dst[207:200] := SaturateU8(b[159:144]) -tmp_dst[215:208] := SaturateU8(b[175:160]) -tmp_dst[223:216] := SaturateU8(b[191:176]) -tmp_dst[231:224] := SaturateU8(b[207:192]) -tmp_dst[239:232] := SaturateU8(b[223:208]) -tmp_dst[247:240] := SaturateU8(b[239:224]) -tmp_dst[255:248] := SaturateU8(b[255:240]) -tmp_dst[263:256] := SaturateU8(a[271:256]) -tmp_dst[271:264] := SaturateU8(a[287:272]) -tmp_dst[279:272] := SaturateU8(a[303:288]) -tmp_dst[287:280] := SaturateU8(a[319:304]) -tmp_dst[295:288] := SaturateU8(a[335:320]) -tmp_dst[303:296] := SaturateU8(a[351:336]) -tmp_dst[311:304] := SaturateU8(a[367:352]) -tmp_dst[319:312] := SaturateU8(a[383:368]) -tmp_dst[327:320] := SaturateU8(b[271:256]) -tmp_dst[335:328] := SaturateU8(b[287:272]) -tmp_dst[343:336] := SaturateU8(b[303:288]) -tmp_dst[351:344] := SaturateU8(b[319:304]) -tmp_dst[359:352] := SaturateU8(b[335:320]) -tmp_dst[367:360] := SaturateU8(b[351:336]) -tmp_dst[375:368] := SaturateU8(b[367:352]) -tmp_dst[383:376] := SaturateU8(b[383:368]) -tmp_dst[391:384] := SaturateU8(a[399:384]) -tmp_dst[399:392] := SaturateU8(a[415:400]) -tmp_dst[407:400] := SaturateU8(a[431:416]) -tmp_dst[415:408] := SaturateU8(a[447:432]) -tmp_dst[423:416] := SaturateU8(a[463:448]) -tmp_dst[431:424] := SaturateU8(a[479:464]) -tmp_dst[439:432] := SaturateU8(a[495:480]) -tmp_dst[447:440] := SaturateU8(a[511:496]) -tmp_dst[455:448] := SaturateU8(b[399:384]) -tmp_dst[463:456] := SaturateU8(b[415:400]) -tmp_dst[471:464] := SaturateU8(b[431:416]) -tmp_dst[479:472] := SaturateU8(b[447:432]) -tmp_dst[487:480] := SaturateU8(b[463:448]) -tmp_dst[495:488] := SaturateU8(b[479:464]) -tmp_dst[503:496] := SaturateU8(b[495:480]) -tmp_dst[511:504] := SaturateU8(b[511:496]) -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := tmp_dst[i+7:i] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Miscellaneous - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". - -dst[7:0] := SaturateU8(a[15:0]) -dst[15:8] := SaturateU8(a[31:16]) -dst[23:16] := SaturateU8(a[47:32]) -dst[31:24] := SaturateU8(a[63:48]) -dst[39:32] := SaturateU8(a[79:64]) -dst[47:40] := SaturateU8(a[95:80]) -dst[55:48] := SaturateU8(a[111:96]) -dst[63:56] := SaturateU8(a[127:112]) -dst[71:64] := SaturateU8(b[15:0]) -dst[79:72] := SaturateU8(b[31:16]) -dst[87:80] := SaturateU8(b[47:32]) -dst[95:88] := SaturateU8(b[63:48]) -dst[103:96] := SaturateU8(b[79:64]) -dst[111:104] := SaturateU8(b[95:80]) -dst[119:112] := SaturateU8(b[111:96]) -dst[127:120] := SaturateU8(b[127:112]) -dst[135:128] := SaturateU8(a[143:128]) -dst[143:136] := SaturateU8(a[159:144]) -dst[151:144] := SaturateU8(a[175:160]) -dst[159:152] := SaturateU8(a[191:176]) -dst[167:160] := SaturateU8(a[207:192]) -dst[175:168] := SaturateU8(a[223:208]) -dst[183:176] := SaturateU8(a[239:224]) -dst[191:184] := SaturateU8(a[255:240]) -dst[199:192] := SaturateU8(b[143:128]) -dst[207:200] := SaturateU8(b[159:144]) -dst[215:208] := SaturateU8(b[175:160]) -dst[223:216] := SaturateU8(b[191:176]) -dst[231:224] := SaturateU8(b[207:192]) -dst[239:232] := SaturateU8(b[223:208]) -dst[247:240] := SaturateU8(b[239:224]) -dst[255:248] := SaturateU8(b[255:240]) -dst[263:256] := SaturateU8(a[271:256]) -dst[271:264] := SaturateU8(a[287:272]) -dst[279:272] := SaturateU8(a[303:288]) -dst[287:280] := SaturateU8(a[319:304]) -dst[295:288] := SaturateU8(a[335:320]) -dst[303:296] := SaturateU8(a[351:336]) -dst[311:304] := SaturateU8(a[367:352]) -dst[319:312] := SaturateU8(a[383:368]) -dst[327:320] := SaturateU8(b[271:256]) -dst[335:328] := SaturateU8(b[287:272]) -dst[343:336] := SaturateU8(b[303:288]) -dst[351:344] := SaturateU8(b[319:304]) -dst[359:352] := SaturateU8(b[335:320]) -dst[367:360] := SaturateU8(b[351:336]) -dst[375:368] := SaturateU8(b[367:352]) -dst[383:376] := SaturateU8(b[383:368]) -dst[391:384] := SaturateU8(a[399:384]) -dst[399:392] := SaturateU8(a[415:400]) -dst[407:400] := SaturateU8(a[431:416]) -dst[415:408] := SaturateU8(a[447:432]) -dst[423:416] := SaturateU8(a[463:448]) -dst[431:424] := SaturateU8(a[479:464]) -dst[439:432] := SaturateU8(a[495:480]) -dst[447:440] := SaturateU8(a[511:496]) -dst[455:448] := SaturateU8(b[399:384]) -dst[463:456] := SaturateU8(b[415:400]) -dst[471:464] := SaturateU8(b[431:416]) -dst[479:472] := SaturateU8(b[447:432]) -dst[487:480] := SaturateU8(b[463:448]) -dst[495:488] := SaturateU8(b[479:464]) -dst[503:496] := SaturateU8(b[495:480]) -dst[511:504] := SaturateU8(b[511:496]) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := 16*j - l := 8*j - dst[l+7:l] := Saturate8(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW -
immintrin.h
- Convert -
- - - - - Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - l := j*16 - dst[l+15:l] := SignExtend16(a[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := SignExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := SignExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 31 - i := 16*j - l := 8*j - dst[l+7:l] := SaturateU8(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 31 - i := 16*j - l := 8*j - dst[l+7:l] := Truncate8(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+15:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i]) - FI -ENDFOR - - - AVX512BW -
immintrin.h
- Convert -
- - - - - Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := 16*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+15:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - l := j*16 - dst[l+15:l] := ZeroExtend16(a[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := ZeroExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - l := j*16 - IF k[j] - dst[l+15:l] := ZeroExtend16(a[i+7:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Convert -
- - - - - - Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Set -
- - - - - Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[7:0] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Set -
- - - - - - Broadcast 16-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Set -
- - - - - Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[15:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Set -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 63 - i := j*8 - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] >= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] > b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] <= b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] < b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 31 - i := j*16 - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] >= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] > b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] <= b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] < b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 63 - i := j*8 - k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 31 - i := j*16 - k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 63 - i := j*8 - IF k1[j] - k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 63 - i := j*8 - k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 31 - i := j*16 - IF k1[j] - k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 31 - i := j*16 - k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Compare -
- - - - - Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] << (tmp*8) -dst[255:128] := a[255:128] << (tmp*8) -dst[383:256] := a[383:256] << (tmp*8) -dst[511:384] := a[511:384] << (tmp*8) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] >> (tmp*8) -dst[255:128] := a[255:128] >> (tmp*8) -dst[383:256] := a[383:256] >> (tmp*8) -dst[511:384] := a[511:384] >> (tmp*8) -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF count[i+15:i] < 16 - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512BW -
immintrin.h
- Shift -
- - - - - Add 32-bit masks in "a" and "b", and store the result in "k". - -k[31:0] := a[31:0] + b[31:0] -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Add 64-bit masks in "a" and "b", and store the result in "k". - -k[63:0] := a[63:0] + b[63:0] -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise AND of 32-bit masks "a" and "b", and store the result in "k". - -k[31:0] := a[31:0] AND b[31:0] -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise AND of 64-bit masks "a" and "b", and store the result in "k". - -k[63:0] := a[63:0] AND b[63:0] -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 32-bit masks "a" and then AND with "b", and store the result in "k". - -k[31:0] := (NOT a[31:0]) AND b[31:0] -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 64-bit masks "a" and then AND with "b", and store the result in "k". - -k[63:0] := (NOT a[63:0]) AND b[63:0] -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - Compute the bitwise NOT of 32-bit mask "a", and store the result in "k". - -k[31:0] := NOT a[31:0] -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - Compute the bitwise NOT of 64-bit mask "a", and store the result in "k". - -k[63:0] := NOT a[63:0] -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 32-bit masks "a" and "b", and store the result in "k". - -k[31:0] := a[31:0] OR b[31:0] -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 64-bit masks "a" and "b", and store the result in "k". - -k[63:0] := a[63:0] OR b[63:0] -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise XNOR of 32-bit masks "a" and "b", and store the result in "k". - -k[31:0] := NOT (a[31:0] XOR b[31:0]) -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise XNOR of 64-bit masks "a" and "b", and store the result in "k". - -k[63:0] := NOT (a[63:0] XOR b[63:0]) -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise XOR of 32-bit masks "a" and "b", and store the result in "k". - -k[31:0] := a[31:0] XOR b[31:0] -k[MAX:32] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise XOR of 64-bit masks "a" and "b", and store the result in "k". - -k[63:0] := a[63:0] XOR b[63:0] -k[MAX:64] := 0 - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Shift the bits of 32-bit mask "a" left by "count" while shifting in zeros, and store the least significant 32 bits of the result in "k". - -k[MAX:0] := 0 -IF count[7:0] <= 31 - k[31:0] := a[31:0] << count[7:0] -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Shift the bits of 64-bit mask "a" left by "count" while shifting in zeros, and store the least significant 64 bits of the result in "k". - -k[MAX:0] := 0 -IF count[7:0] <= 63 - k[63:0] := a[63:0] << count[7:0] -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Shift the bits of 32-bit mask "a" right by "count" while shifting in zeros, and store the least significant 32 bits of the result in "k". - -k[MAX:0] := 0 -IF count[7:0] <= 31 - k[31:0] := a[31:0] >> count[7:0] -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Shift the bits of 64-bit mask "a" right by "count" while shifting in zeros, and store the least significant 64 bits of the result in "k". - -k[MAX:0] := 0 -IF count[7:0] <= 63 - k[63:0] := a[63:0] >> count[7:0] -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - - Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". - -tmp[31:0] := a[31:0] OR b[31:0] -IF tmp[31:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI -IF tmp[31:0] == 0xFFFFFFFF - MEM[all_ones+7:all_ones] := 1 -ELSE - MEM[all_ones+7:all_ones] := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". - -tmp[31:0] := a[31:0] OR b[31:0] -IF tmp[31:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". - -tmp[31:0] := a[31:0] OR b[31:0] -IF tmp[31:0] == 0xFFFFFFFF - dst := 1 -ELSE - dst := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - - Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". - -tmp[63:0] := a[63:0] OR b[63:0] -IF tmp[63:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI -IF tmp[7:0] == 0xFFFFFFFFFFFFFFFF - MEM[all_ones+7:all_ones] := 1 -ELSE - MEM[all_ones+7:all_ones] := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". - -tmp[63:0] := a[63:0] OR b[63:0] -IF tmp[63:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". - -tmp[63:0] := a[63:0] OR b[63:0] -IF tmp[63:0] == 0xFFFFFFFFFFFFFFFF - dst := 1 -ELSE - dst := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - - Compute the bitwise AND of 32-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". - -tmp1[31:0] := a[31:0] AND b[31:0] -IF tmp1[31:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI -tmp2[31:0] := (NOT a[31:0]) AND b[31:0] -IF tmp2[31:0] == 0x0 - MEM[and_not+7:and_not] := 1 -ELSE - MEM[and_not+7:and_not] := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise AND of 32-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". - -tmp[31:0] := a[31:0] AND b[31:0] -IF tmp[31:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 32-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". - -tmp[31:0] := (NOT a[31:0]) AND b[31:0] -IF tmp[31:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - - Compute the bitwise AND of 64-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". - -tmp1[63:0] := a[63:0] AND b[63:0] -IF tmp1[63:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI -tmp2[63:0] := (NOT a[63:0]) AND b[63:0] -IF tmp2[63:0] == 0x0 - MEM[and_not+7:and_not] := 1 -ELSE - MEM[and_not+7:and_not] := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise AND of 64-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". - -tmp[63:0] := a[63:0] AND b[63:0] -IF tmp[63:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 64-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". - -tmp[63:0] := (NOT a[63:0]) AND b[63:0] -IF tmp[63:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512BW -
immintrin.h
- Mask -
- - - - Convert 32-bit mask "a" into an integer value, and store the result in "dst". - -dst := ZeroExtend32(a[31:0]) - - - AVX512BW -
immintrin.h
- Mask -
- - - - Convert 64-bit mask "a" into an integer value, and store the result in "dst". - -dst := ZeroExtend64(a[63:0]) - - - AVX512BW -
immintrin.h
- Mask -
- - - - Convert integer value "a" into an 32-bit mask, and store the result in "k". - -k := ZeroExtend32(a[31:0]) - - - AVX512BW -
immintrin.h
- Mask -
- - - - Convert integer value "a" into an 64-bit mask, and store the result in "k". - -k := ZeroExtend64(a[63:0]) - - - AVX512BW -
immintrin.h
- Mask -
- - - - - - Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ZeroExtend64(k[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ZeroExtend64(k[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ZeroExtend32(k[15:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ZeroExtend32(k[15:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 7 - i := j*32 - FOR k := 0 to j-1 - m := k*32 - dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 7 - i := j*32 - IF k[j] - FOR l := 0 to j-1 - m := l*32 - dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 7 - i := j*32 - IF k[j] - FOR l := 0 to j-1 - m := l*32 - dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 3 - i := j*32 - FOR k := 0 to j-1 - m := k*32 - dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 3 - i := j*32 - IF k[j] - FOR l := 0 to j-1 - m := l*32 - dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 3 - i := j*32 - IF k[j] - FOR l := 0 to j-1 - m := l*32 - dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 3 - i := j*64 - FOR k := 0 to j-1 - m := k*64 - dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 3 - i := j*64 - IF k[j] - FOR l := 0 to j-1 - m := l*64 - dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 3 - i := j*64 - IF k[j] - FOR l := 0 to j-1 - m := l*64 - dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 1 - i := j*64 - FOR k := 0 to j-1 - m := k*64 - dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 1 - i := j*64 - IF k[j] - FOR l := 0 to j-1 - m := l*64 - dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 1 - i := j*64 - IF k[j] - FOR l := 0 to j-1 - m := l*64 - dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Compare -
- - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512CD - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ZeroExtend64(k[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Swizzle -
- - - - Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ZeroExtend32(k[15:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Swizzle -
- - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 15 - i := j*32 - FOR k := 0 to j-1 - m := k*32 - dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Compare -
- - - - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 15 - i := j*32 - IF k[j] - FOR l := 0 to j-1 - m := l*32 - dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Compare -
- - - - - Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 15 - i := j*32 - IF k[j] - FOR l := 0 to j-1 - m := l*32 - dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 - ENDFOR - dst[i+31:i+j] := 0 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Compare -
- - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 7 - i := j*64 - FOR k := 0 to j-1 - m := k*64 - dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Compare -
- - - - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 7 - i := j*64 - IF k[j] - FOR l := 0 to j-1 - m := l*64 - dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Compare -
- - - - - Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst". - -FOR j := 0 to 7 - i := j*64 - IF k[j] - FOR l := 0 to j-1 - m := l*64 - dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 - ENDFOR - dst[i+63:i+j] := 0 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Compare -
- - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Bit Manipulation -
- - - - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Bit Manipulation -
- - - - - Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp := 31 - dst[i+31:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+31:i] := dst[i+31:i] + 1 - OD - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Bit Manipulation -
- - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Bit Manipulation -
- - - - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Bit Manipulation -
- - - - - Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp := 63 - dst[i+63:i] := 0 - DO WHILE (tmp >= 0 AND a[i+tmp] == 0) - tmp := tmp - 1 - dst[i+63:i] := dst[i+63:i] + 1 - OD - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512CD -
immintrin.h
- Bit Manipulation -
- - - - - - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Logical -
- - - - Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*32 - n := (j % 2)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*64 - n := (j % 2)*64 - dst[i+63:i] := a[n+63:n] -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - n := (j % 2)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - n := (j % 2)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. - -FOR j := 0 to 7 - i := j*32 - n := (j % 2)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. - -FOR j := 0 to 3 - i := j*32 - n := (j % 2)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*64 - n := (j % 2)*64 - dst[i+63:i] := a[n+63:n] -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - n := (j % 2)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - n := (j % 2)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -ESAC -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -ESAC -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -ESAC -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -ESAC -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -ESAC -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -ESAC -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR j := 0 to 3 - i := j*64 - k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) -ENDFOR -k[MAX:4] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR j := 0 to 1 - i := j*64 - k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) -ENDFOR -k[MAX:2] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR j := 0 to 7 - i := j*32 - k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) -ENDFOR -k[MAX:8] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR j := 0 to 3 - i := j*32 - k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) -ENDFOR -k[MAX:4] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". - -dst[255:0] := a[255:0] -CASE imm8[0] OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -ESAC -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[255:0] := a[255:0] -CASE (imm8[0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -ESAC -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[255:0] := a[255:0] -CASE (imm8[0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -ESAC -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". - -dst[255:0] := a[255:0] -CASE imm8[0] OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -ESAC -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[255:0] := a[255:0] -CASE (imm8[0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -ESAC -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[255:0] := a[255:0] -CASE (imm8[0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -ESAC -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". - -FOR j := 0 to 7 - i := j*32 - IF a[i+31] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". - -FOR j := 0 to 3 - i := j*32 - IF a[i+31] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := 0xFFFFFFFF - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := 0xFFFFFFFF - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := 0xFFFFFFFFFFFFFFFF - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := 0xFFFFFFFFFFFFFFFF - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". - -FOR j := 0 to 3 - i := j*64 - IF a[i+63] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". - -FOR j := 0 to 1 - i := j*64 - IF a[i+63] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - RETURN tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - l := j*32 - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - l := j*32 - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - l := j*32 - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - l := j*32 - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Convert -
- - - - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] OR b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] OR b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Logical -
- - - - Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - n := (j % 2)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - n := (j % 8)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 8)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 8)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*64 - n := (j % 2)*64 - dst[i+63:i] := a[n+63:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - n := (j % 2)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - n := (j % 2)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst. - -FOR j := 0 to 15 - i := j*32 - n := (j % 2)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 2)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - n := (j % 8)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 8)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 8)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*64 - n := (j % 2)*64 - dst[i+63:i] := a[n+63:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - n := (j % 2)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - n := (j % 2)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[255:0] := a[255:0] -1: dst[255:0] := a[511:256] -ESAC -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[255:0] := a[255:0] -1: tmp[255:0] := a[511:256] -ESAC -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[255:0] := a[255:0] -1: tmp[255:0] := a[511:256] -ESAC -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[1:0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -2: dst[127:0] := a[383:256] -3: dst[127:0] := a[511:384] -ESAC -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[1:0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -2: tmp[127:0] := a[383:256] -3: tmp[127:0] := a[511:384] -ESAC -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[1:0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -2: tmp[127:0] := a[383:256] -3: tmp[127:0] := a[511:384] -ESAC -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[255:0] := a[255:0] -1: dst[255:0] := a[511:256] -ESAC -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[255:0] := a[255:0] -1: tmp[255:0] := a[511:256] -ESAC -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[255:0] := a[255:0] -1: tmp[255:0] := a[511:256] -ESAC -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[1:0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -2: dst[127:0] := a[383:256] -3: dst[127:0] := a[511:384] -ESAC -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[1:0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -2: tmp[127:0] := a[383:256] -3: tmp[127:0] := a[511:384] -ESAC -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[1:0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -2: tmp[127:0] := a[383:256] -3: tmp[127:0] := a[511:384] -ESAC -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR j := 0 to 7 - i := j*64 - k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) -ENDFOR -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR j := 0 to 15 - i := j*32 - k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) -ENDFOR -k[MAX:16] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0]) - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k". - [fpclass_note] - k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0]) -k[MAX:1] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). - [fpclass_note] - IF k1[0] - k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0]) -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k. - [fpclass_note] - k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0]) -k[MAX:1] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). - [fpclass_note] - IF k1[0] - k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0]) -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Copy "a" to "dst", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". - -dst[511:0] := a[511:0] -CASE (imm8[0]) OF -0: dst[255:0] := b[255:0] -1: dst[511:256] := b[255:0] -ESAC -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[0]) OF -0: tmp[255:0] := b[255:0] -1: tmp[511:256] := b[255:0] -ESAC -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[0]) OF -0: tmp[255:0] := b[255:0] -1: tmp[511:256] := b[255:0] -ESAC -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". - -dst[511:0] := a[511:0] -CASE imm8[1:0] OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -2: dst[383:256] := b[127:0] -3: dst[511:384] := b[127:0] -ESAC -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -2: tmp[383:256] := b[127:0] -3: tmp[511:384] := b[127:0] -ESAC -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -2: tmp[383:256] := b[127:0] -3: tmp[511:384] := b[127:0] -ESAC -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Copy "a" to "dst", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". - -dst[511:0] := a[511:0] -CASE imm8[0] OF -0: dst[255:0] := b[255:0] -1: dst[511:256] := b[255:0] -ESAC -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[0]) OF -0: tmp[255:0] := b[255:0] -1: tmp[511:256] := b[255:0] -ESAC -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[0]) OF -0: tmp[255:0] := b[255:0] -1: tmp[511:256] := b[255:0] -ESAC -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". - -dst[511:0] := a[511:0] -CASE imm8[1:0] OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -2: dst[383:256] := b[127:0] -3: dst[511:384] := b[127:0] -ESAC -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -2: tmp[383:256] := b[127:0] -3: tmp[511:384] := b[127:0] -ESAC -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -2: tmp[383:256] := b[127:0] -3: tmp[511:384] := b[127:0] -ESAC -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a". - -FOR j := 0 to 15 - i := j*32 - IF a[i+31] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := 0xFFFFFFFF - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k". - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := 0xFFFFFFFFFFFFFFFF - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a". - -FOR j := 0 to 7 - i := j*64 - IF a[i+63] - k[j] := 1 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[63:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -IF k[0] - dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -IF k[0] - dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -IF k[0] - dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -IF k[0] - dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src1[63:0] : src2[63:0] - 1: tmp[63:0] := (src1[63:0] <= src2[63:0]) ? src2[63:0] : src1[63:0] - 2: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src1[63:0] : src2[63:0] - 3: tmp[63:0] := (ABS(src1[63:0]) <= ABS(src2[63:0])) ? src2[63:0] : src1[63:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[63:0] := (src1[63] << 63) OR (tmp[62:0]) - 1: dst[63:0] := tmp[63:0] - 2: dst[63:0] := (0 << 63) OR (tmp[62:0]) - 3: dst[63:0] := (1 << 63) OR (tmp[62:0]) - ESAC - - RETURN dst -} -dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[31:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -IF k[0] - dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[31:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -IF k[0] - dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[31:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -IF k[0] - dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[31:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -IF k[0] - dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. - imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note] - -DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) { - CASE opCtl[1:0] OF - 0: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src1[31:0] : src2[31:0] - 1: tmp[31:0] := (src1[31:0] <= src2[31:0]) ? src2[31:0] : src1[31:0] - 2: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src1[31:0] : src2[31:0] - 3: tmp[31:0] := (ABS(src1[31:0]) <= ABS(src2[31:0])) ? src2[31:0] : src1[31:0] - ESAC - - CASE signSelCtl[1:0] OF - 0: dst[31:0] := (src1[31] << 31) OR (tmp[30:0]) - 1: dst[31:0] := tmp[31:0] - 2: dst[31:0] := (0 << 31) OR (tmp[30:0]) - 3: dst[31:0] := (1 << 31) OR (tmp[30:0]) - ESAC - - RETURN dst -} -dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -IF k[0] - dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - - Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -IF k[0] - dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -IF k[0] - dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -IF k[0] - dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - tmp[63:0] := src1[63:0] - tmp[63:0] - IF IsInf(tmp[63:0]) - tmp[63:0] := FP64(0.0) - FI - RETURN tmp[63:0] -} -dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -IF k[0] - dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - - Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -IF k[0] - dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -IF k[0] - dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - - Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -IF k[0] - dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - tmp[31:0] := src1[31:0] - tmp[31:0] - IF IsInf(tmp[31:0]) - tmp[31:0] := FP32(0.0) - FI - RETURN tmp[31:0] -} -dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512DQ -
immintrin.h
- Miscellaneous -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512DQ -
immintrin.h
- Convert -
- - - - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst". - -FOR j := 0 to 7 - i := j*64 - tmp[127:0] := a[i+63:i] * b[i+63:i] - dst[i+63:i] := tmp[63:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512DQ -
immintrin.h
- Arithmetic -
- - - - - Add 8-bit masks in "a" and "b", and store the result in "k". - -k[7:0] := a[7:0] + b[7:0] -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Add 16-bit masks in "a" and "b", and store the result in "k". - -k[15:0] := a[15:0] + b[15:0] -k[MAX:16] := 0 - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise AND of 8-bit masks "a" and "b", and store the result in "k". - -k[7:0] := a[7:0] AND b[7:0] -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 8-bit masks "a" and then AND with "b", and store the result in "k". - -k[7:0] := (NOT a[7:0]) AND b[7:0] -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Mask -
- - - - Compute the bitwise NOT of 8-bit mask "a", and store the result in "k". - -k[7:0] := NOT a[7:0] -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 8-bit masks "a" and "b", and store the result in "k". - -k[7:0] := a[7:0] OR b[7:0] -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise XNOR of 8-bit masks "a" and "b", and store the result in "k". - -k[7:0] := NOT (a[7:0] XOR b[7:0]) -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise XOR of 8-bit masks "a" and "b", and store the result in "k". - -k[7:0] := a[7:0] XOR b[7:0] -k[MAX:8] := 0 - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Shift the bits of 8-bit mask "a" left by "count" while shifting in zeros, and store the least significant 8 bits of the result in "k". - -k[MAX:0] := 0 -IF count[7:0] <= 7 - k[7:0] := a[7:0] << count[7:0] -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Shift the bits of 8-bit mask "a" right by "count" while shifting in zeros, and store the least significant 8 bits of the result in "k". - -k[MAX:0] := 0 -IF count[7:0] <= 7 - k[7:0] := a[7:0] >> count[7:0] -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - - Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". - -tmp[7:0] := a[7:0] OR b[7:0] -IF tmp[7:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI -IF tmp[7:0] == 0xFF - MEM[all_ones+7:all_ones] := 1 -ELSE - MEM[all_ones+7:all_ones] := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". - -tmp[7:0] := a[7:0] OR b[7:0] -IF tmp[7:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". - -tmp[7:0] := a[7:0] OR b[7:0] -IF tmp[7:0] == 0xFF - dst := 1 -ELSE - dst := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - - Compute the bitwise AND of 8-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". - -tmp1[7:0] := a[7:0] AND b[7:0] -IF tmp1[7:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI -tmp2[7:0] := (NOT a[7:0]) AND b[7:0] -IF tmp2[7:0] == 0x0 - MEM[and_not+7:and_not] := 1 -ELSE - MEM[and_not+7:and_not] := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise AND of 8-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". - -tmp[7:0] := a[7:0] AND b[7:0] -IF tmp[7:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 8-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". - -tmp[7:0] := (NOT a[7:0]) AND b[7:0] -IF tmp[7:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - - Compute the bitwise AND of 16-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not". - -tmp1[15:0] := a[15:0] AND b[15:0] -IF tmp1[15:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI -tmp2[15:0] := (NOT a[15:0]) AND b[15:0] -IF tmp2[15:0] == 0x0 - MEM[and_not+7:and_not] := 1 -ELSE - MEM[and_not+7:and_not] := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise AND of 16-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". - -tmp[15:0] := a[15:0] AND b[15:0] -IF tmp[15:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 16-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". - -tmp[15:0] := (NOT a[15:0]) AND b[15:0] -IF tmp[15:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512DQ -
immintrin.h
- Mask -
- - - - Convert 8-bit mask "a" into an integer value, and store the result in "dst". - -dst := ZeroExtend32(a[7:0]) - - - AVX512DQ -
immintrin.h
- Mask -
- - - - Convert integer value "a" into an 8-bit mask, and store the result in "k". - -k := a[7:0] - - - AVX512DQ -
immintrin.h
- Mask -
- - - - Load 8-bit mask from memory into "k". - -k[7:0] := MEM[mem_addr+7:mem_addr] - - - AVX512DQ -
immintrin.h
- Load -
- - - - - Store 8-bit mask from "a" into memory. - -MEM[mem_addr+7:mem_addr] := a[7:0] - - - AVX512DQ -
immintrin.h
- Store -
- - - - - - Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ACOS(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ACOS(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ACOS(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ACOS(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ACOSH(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ACOSH(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ACOSH(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ACOSH(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ASIN(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ASIN(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ASIN(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ASIN(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ASINH(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ASINH(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ASINH(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ASINH(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - - Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - - Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians. - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ATAN(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ATAN(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ATAN(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ATAN(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians. - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ATANH(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ATANH(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperblic tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ATANH(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ATANH(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := COS(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := COS(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := COS(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := COS(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := COSD(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := COSD(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := COSD(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := COSD(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := COSH(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := COSH(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := COSH(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := COSH(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SIN(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SIN(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SIN(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SIN(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SINH(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SINH(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SINH(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SINH(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SIND(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SIND(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SIND(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SIND(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := TAN(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := TAN(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := TAN(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := TAN(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := TAND(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := TAND(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := TAND(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := TAND(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := TANH(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := TANH(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := TANH(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := TANH(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SIN(a[i+63:i]) - MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 -cos_res[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - - - Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SIN(a[i+63:i]) - MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) - ELSE - dst[i+63:i] := sin_src[i+63:i] - MEM[mem_addr+i+63:mem_addr+i] := cos_src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 -cos_res[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SIN(a[i+31:i]) - MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 -cos_res[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - - - - - Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SIN(a[i+31:i]) - MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) - ELSE - dst[i+31:i] := sin_src[i+31:i] - MEM[mem_addr+i+31:mem_addr+i] := cos_src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 -cos_res[MAX:512] := 0 - - AVX512F -
immintrin.h
- Trigonometry -
- - - - Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := CubeRoot(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := CubeRoot(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := CubeRoot(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := CubeRoot(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := POW(10.0, a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := POW(10.0, a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := POW(2.0, a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := POW(2.0, a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := POW(e, a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := POW(e, a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := POW(FP32(e), a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := POW(FP32(e), a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := InvSQRT(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := InvSQRT(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := InvSQRT(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := InvSQRT(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := LOG(1.0 + a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := LOG(1.0 + a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := LOG(1.0 + a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := LOG(1.0 + a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := LOG(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := LOG(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := (1.0 / a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (1.0 / a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := CDFNormal(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := CDFNormal(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := CDFNormal(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := CDFNormal(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := InverseCDFNormal(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := InverseCDFNormal(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := InverseCDFNormal(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := InverseCDFNormal(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ERF(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ERF(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := 1.0 - ERF(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := 1.0 - ERF(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ERF(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ERF(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+63:i] := 1.0 - ERF(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+63:i] := 1.0 - ERF(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := 1.0 / ERF(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := 1.0 / ERF(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+63:i] := 1.0 / ERF(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+63:i] := 1.0 / ERF(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - - - Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Probability/Statistics -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := CEIL(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := CEIL(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := CEIL(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := CEIL(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := FLOOR(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := FLOOR(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := FLOOR(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := FLOOR(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := NearbyInt(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := NearbyInt(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed single-precision floating-point elements in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := NearbyInt(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := NearbyInt(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := RoundToNearestEven(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RoundToNearestEven(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := RoundToNearestEven(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RoundToNearestEven(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ROUND(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ROUND(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := TRUNCATE(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := TRUNCATE(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := TRUNCATE(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := TRUNCATE(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 15 - i := 32*j - IF b[i+31:i] == 0 - #DE - FI - dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - IF b[i+31:i] == 0 - #DE - FI - dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 63 - i := 8*j - IF b[i+7:i] == 0 - #DE - FI - dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 31 - i := 16*j - IF b[i+15:i] == 0 - #DE - FI - dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 7 - i := 64*j - IF b[i+63:i] == 0 - #DE - FI - dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 63 - i := 8*j - dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 31 - i := 16*j - dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 7 - i := 64*j - dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 15 - i := 32*j - IF b[i+31:i] == 0 - #DE - FI - dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - IF b[i+31:i] == 0 - #DE - FI - dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 63 - i := 8*j - IF b[i+7:i] == 0 - #DE - FI - dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 31 - i := 16*j - IF b[i+15:i] == 0 - #DE - FI - dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 7 - i := 64*j - IF b[i+63:i] == 0 - #DE - FI - dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 63 - i := 8*j - dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 31 - i := 16*j - dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 7 - i := 64*j - dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - IF k[j] - dst[i+63:i] := a[i+63:i] / b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - IF k[j] - dst[i+63:i] := a[i+63:i] / b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - IF k[j] - dst[i+63:i] := a[i+63:i] / b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - IF k[j] - dst[i+63:i] := a[i+63:i] / b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := a[i+31:i] / b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := a[i+31:i] / b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := a[i+31:i] / b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := a[i+31:i] / b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ABS(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ABS(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ABS(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ABS(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ABS(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ABS(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ABS(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ABS(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ABS(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ABS(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] :=0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := (1.0 / a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (1.0 / a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := (1.0 / a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (1.0 / a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst". - -temp[511:256] := a[255:0] -temp[255:0] := b[255:0] -temp[511:0] := temp[511:0] >> (32*imm8[2:0]) -dst[255:0] := temp[255:0] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -temp[511:256] := a[255:0] -temp[255:0] := b[255:0] -temp[511:0] := temp[511:0] >> (32*imm8[2:0]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := temp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -temp[511:256] := a[255:0] -temp[255:0] := b[255:0] -temp[511:0] := temp[511:0] >> (32*imm8[2:0]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := temp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst". - -temp[255:128] := a[127:0] -temp[127:0] := b[127:0] -temp[255:0] := temp[255:0] >> (32*imm8[1:0]) -dst[127:0] := temp[127:0] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -temp[255:128] := a[127:0] -temp[127:0] := b[127:0] -temp[255:0] := temp[255:0] >> (32*imm8[1:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := temp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -temp[255:128] := a[127:0] -temp[127:0] := b[127:0] -temp[255:0] := temp[255:0] >> (32*imm8[1:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := temp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst". - -temp[511:256] := a[255:0] -temp[255:0] := b[255:0] -temp[511:0] := temp[511:0] >> (64*imm8[1:0]) -dst[255:0] := temp[255:0] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -temp[511:256] := a[255:0] -temp[255:0] := b[255:0] -temp[511:0] := temp[511:0] >> (64*imm8[1:0]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := temp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -temp[511:256] := a[255:0] -temp[255:0] := b[255:0] -temp[511:0] := temp[511:0] >> (64*imm8[1:0]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := temp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst". - -temp[255:128] := a[127:0] -temp[127:0] := b[127:0] -temp[255:0] := temp[255:0] >> (64*imm8[0]) -dst[127:0] := temp[127:0] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -temp[255:128] := a[127:0] -temp[127:0] := b[127:0] -temp[255:0] := temp[255:0] >> (64*imm8[0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := temp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -temp[255:128] := a[127:0] -temp[127:0] := b[127:0] -temp[255:0] := temp[255:0] >> (64*imm8[0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := temp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*32 - n := (j % 4)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := (j % 4)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := (j % 4)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*32 - n := (j % 4)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := (j % 4)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := (j % 4)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 64 -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[255:m] := src[255:m] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 64 -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[255:m] := 0 -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 64 -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[127:m] := src[127:m] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 64 -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[127:m] := 0 -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 32 -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[255:m] := src[255:m] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 32 -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[255:m] := 0 -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 32 -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[127:m] := src[127:m] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 32 -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[127:m] := 0 -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -ESAC -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -ESAC -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -ESAC -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -ESAC -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -ESAC -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -ESAC -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN: j := 0 - SNAN_TOKEN: j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". - -dst[255:0] := a[255:0] -CASE (imm8[0]) OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -ESAC -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[255:0] := a[255:0] -CASE (imm8[0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -ESAC -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[255:0] := a[255:0] -CASE (imm8[0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -ESAC -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". - -dst[255:0] := a[255:0] -CASE (imm8[0]) OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -ESAC -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[255:0] := a[255:0] -CASE (imm8[0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -ESAC -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[255:0] := a[255:0] -CASE (imm8[0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -ESAC -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 32 -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[255:m] := src[255:m] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 32 -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[255:m] := 0 -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 32 -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[127:m] := src[127:m] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 32 -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[127:m] := 0 -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 64 -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[255:m] := src[255:m] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 64 -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[255:m] := 0 -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 64 -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[127:m] := src[127:m] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 64 -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[127:m] := 0 -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - id := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - id := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - id := idx[i+2:i]*32 - dst[i+31:i] := a[id+31:id] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - off := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := idx[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - off := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - off := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - off := idx[i+2:i]*32 - dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] -ENDFOR -dst[MAX:256] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - off := idx[i+1:i]*32 - IF k[j] - dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := idx[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - off := idx[i+1:i]*32 - IF k[j] - dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - off := idx[i+1:i]*32 - IF k[j] - dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - off := idx[i+1:i]*32 - dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] -ENDFOR -dst[MAX:128] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - off := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := idx[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - off := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - off := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - off := idx[i+1:i]*64 - dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] -ENDFOR -dst[MAX:256] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set) - -FOR j := 0 to 1 - i := j*64 - off := idx[i]*64 - IF k[j] - dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := idx[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - off := idx[i]*64 - IF k[j] - dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - off := idx[i]*64 - IF k[j] - dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - off := idx[i]*64 - dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] -ENDFOR -dst[MAX:128] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - off := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := idx[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - off := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - off := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - off := idx[i+2:i]*32 - dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off] -ENDFOR -dst[MAX:256] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - off := idx[i+1:i]*32 - IF k[j] - dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := idx[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - off := idx[i+1:i]*32 - IF k[j] - dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - off := idx[i+1:i]*32 - IF k[j] - dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - off := idx[i+1:i]*32 - dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off] -ENDFOR -dst[MAX:128] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - off := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := idx[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - off := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - off := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - off := idx[i+1:i]*64 - dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off] -ENDFOR -dst[MAX:256] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - off := idx[i]*64 - IF k[j] - dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := idx[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - off := idx[i]*64 - IF k[j] - dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - off := idx[i]*64 - IF k[j] - dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - off := idx[i]*64 - dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off] -ENDFOR -dst[MAX:128] := 0 - - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI -IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI -IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI -IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI -IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI -IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI -IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI -IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI -IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI -IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI -IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI -IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI -IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI -IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI -IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI -IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI -IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) -tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) -tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) -tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) -tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) -tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) -tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) -tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) -tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) -tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) -tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) -tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) -tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) -tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) -tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) -tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) -tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - id := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := a[id+63:id] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - id := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := a[id+63:id] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - id := idx[i+1:i]*64 - dst[i+63:i] := a[id+63:id] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - id := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - id := idx[i+2:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". - -FOR j := 0 to 7 - i := j*32 - id := idx[i+2:i]*32 - dst[i+31:i] := a[id+31:id] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 64-bit integers in "a" across lanes lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - id := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := a[id+63:id] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - id := idx[i+1:i]*64 - IF k[j] - dst[i+63:i] := a[id+63:id] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - id := idx[i+1:i]*64 - dst[i+63:i] := a[id+63:id] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst.m128[0] := a.m128[imm8[0]] -tmp_dst.m128[1] := b.m128[imm8[1]] -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst.m128[0] := a.m128[imm8[0]] -tmp_dst.m128[1] := b.m128[imm8[1]] -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". - -dst.m128[0] := a.m128[imm8[0]] -dst.m128[1] := b.m128[imm8[1]] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst.m128[0] := a.m128[imm8[0]] -tmp_dst.m128[1] := b.m128[imm8[1]] -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst.m128[0] := a.m128[imm8[0]] -tmp_dst.m128[1] := b.m128[imm8[1]] -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". - -dst.m128[0] := a.m128[imm8[0]] -dst.m128[1] := b.m128[imm8[1]] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst.m128[0] := a.m128[imm8[0]] -tmp_dst.m128[1] := b.m128[imm8[1]] -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst.m128[0] := a.m128[imm8[0]] -tmp_dst.m128[1] := b.m128[imm8[1]] -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". - -dst.m128[0] := a.m128[imm8[0]] -dst.m128[1] := b.m128[imm8[1]] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst.m128[0] := a.m128[imm8[0]] -tmp_dst.m128[1] := b.m128[imm8[1]] -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst.m128[0] := a.m128[imm8[0]] -tmp_dst.m128[1] := b.m128[imm8[1]] -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". - -dst.m128[0] := a.m128[imm8[0]] -dst.m128[1] := b.m128[imm8[1]] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] -tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] -tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] -tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] -tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 3 - i := j*64 - k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - i := j*32 - k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*32 - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*32 - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 3 - i := j*64 - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 1 - i := j*64 - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 7 - i := j*32 - k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 3 - i := j*32 - k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 3 - i := j*64 - k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 1 - i := j*64 - k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 7 - i := j*32 - IF k1[j] - k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 7 - i := j*32 - k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 3 - i := j*32 - IF k1[j] - k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 3 - i := j*32 - k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 3 - i := j*64 - IF k1[j] - k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 3 - i := j*64 - k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:4] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 1 - i := j*64 - IF k1[j] - k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 1 - i := j*64 - k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:2] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Compare -
- - - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 64 -m := base_addr -FOR j := 0 to 3 - i := j*64 - IF k[j] - MEM[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 64 -m := base_addr -FOR j := 0 to 1 - i := j*64 - IF k[j] - MEM[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 32 -m := base_addr -FOR j := 0 to 7 - i := j*32 - IF k[j] - MEM[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 32 -m := base_addr -FOR j := 0 to 3 - i := j*32 - IF k[j] - MEM[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 32-bit integers from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 32-bit integers from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 64-bit integers from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 64-bit integers from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 32-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 32-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 64-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed 64-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 32 -m := base_addr -FOR j := 0 to 7 - i := j*32 - IF k[j] - MEM[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 32 -m := base_addr -FOR j := 0 to 3 - i := j*32 - IF k[j] - MEM[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 64 -m := base_addr -FOR j := 0 to 3 - i := j*64 - IF k[j] - MEM[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 64 -m := base_addr -FOR j := 0 to 1 - i := j*64 - IF k[j] - MEM[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 4 packed 64-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 8 packed 32-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 2 packed 64-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 4 packed 32-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 4 packed 64-bit integers) from "a" into memory. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 8 packed 32-bit integers) from "a" into memory. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 2 packed 64-bit integers) from "a" into memory. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 4 packed 32-bit integers) from "a" into memory. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - AVX512F - AVX512VL -
immintrin.h
- Store -
- - - - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - m := j*64 - IF k[j] - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) - ELSE - dst[m+63:m] := src[m+63:m] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - m := j*64 - IF k[j] - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) - ELSE - dst[m+63:m] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*32 - m := j*64 - IF k[j] - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) - ELSE - dst[m+63:m] := src[m+63:m] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*32 - m := j*64 - IF k[j] - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) - ELSE - dst[m+63:m] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -FOR j := 0 to 7 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -FOR j := 0 to 7 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -FOR j := 0 to 7 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -FOR j := 0 to 7 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -FOR j := 0 to 3 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -FOR j := 0 to 3 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -FOR j := 0 to 3 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -FOR j := 0 to 3 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_Int32_To_FP64(a[l+31:l]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_Int32_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 8*j - dst[k+7:k] := Truncate8(a[i+31:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 8*j - dst[k+7:k] := Truncate8(a[i+31:i]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 16*j - dst[k+15:k] := Truncate16(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 16*j - dst[k+15:k] := Truncate16(a[i+31:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 8*j - dst[k+7:k] := Truncate8(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 8*j - dst[k+7:k] := Truncate8(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 32*j - dst[k+31:k] := Truncate32(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Truncate32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Truncate32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 32*j - dst[k+31:k] := Truncate32(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Truncate32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Truncate32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 16*j - dst[k+15:k] := Truncate16(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 16*j - dst[k+15:k] := Truncate16(a[i+63:i]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 8*j - dst[k+7:k] := Saturate8(a[i+31:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 8*j - dst[k+7:k] := Saturate8(a[i+31:i]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 16*j - dst[k+15:k] := Saturate16(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 16*j - dst[k+15:k] := Saturate16(a[i+31:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 8*j - dst[k+7:k] := Saturate8(a[i+63:i]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 8*j - dst[k+7:k] := Saturate8(a[i+63:i]) -ENDFOR -dst[MAX:16] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:16] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:16] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 32*j - dst[k+31:k] := Saturate32(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Saturate32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Saturate32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 32*j - dst[k+31:k] := Saturate32(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Saturate32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Saturate32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 16*j - dst[k+15:k] := Saturate16(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 16*j - dst[k+15:k] := Saturate16(a[i+63:i]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - l := j*16 - IF k[j] - dst[i+31:i] := SignExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - l := j*16 - IF k[j] - dst[i+31:i] := SignExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 8*j - dst[k+7:k] := SaturateU8(a[i+31:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 8*j - dst[k+7:k] := SaturateU8(a[i+31:i]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 16*j - dst[k+15:k] := SaturateU16(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 16*j - dst[k+15:k] := SaturateU16(a[i+31:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 8*j - dst[k+7:k] := SaturateU8(a[i+63:i]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 8*j - dst[k+7:k] := SaturateU8(a[i+63:i]) -ENDFOR -dst[MAX:16] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:16] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:16] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 32*j - dst[k+31:k] := SaturateU32(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := SaturateU32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := SaturateU32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 32*j - dst[k+31:k] := SaturateU32(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := SaturateU32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := SaturateU32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := 64*j - k := 16*j - dst[k+15:k] := SaturateU16(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 16*j - dst[k+15:k] := SaturateU16(a[i+63:i]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) - FI -ENDFOR - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in th elow 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Convert -
- - - - - - Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 3 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 1 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 4 packed 64-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 8 packed 32-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 2 packed 64-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 4 packed 32-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 4 packed 64-bit integers) from memory into "dst". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 8 packed 32-bit integers) from memory into "dst". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 2 packed 64-bit integers) from memory into "dst". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[127:0] := MEM[mem_addr+127:mem_addr] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 4 packed 32-bit integers) from memory into "dst". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[127:0] := MEM[mem_addr+127:mem_addr] -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Load -
- - - - - - Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[63:0] := a[63:0] -tmp[127:64] := a[63:0] -tmp[191:128] := a[191:128] -tmp[255:192] := a[191:128] -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[63:0] := a[63:0] -tmp[127:64] := a[63:0] -tmp[191:128] := a[191:128] -tmp[255:192] := a[191:128] -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[63:0] := a[63:0] -tmp[127:64] := a[63:0] -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[63:0] := a[63:0] -tmp[127:64] := a[63:0] -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[31:0] := a[63:32] -tmp[63:32] := a[63:32] -tmp[95:64] := a[127:96] -tmp[127:96] := a[127:96] -tmp[159:128] := a[191:160] -tmp[191:160] := a[191:160] -tmp[223:192] := a[255:224] -tmp[255:224] := a[255:224] -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[31:0] := a[63:32] -tmp[63:32] := a[63:32] -tmp[95:64] := a[127:96] -tmp[127:96] := a[127:96] -tmp[159:128] := a[191:160] -tmp[191:160] := a[191:160] -tmp[223:192] := a[255:224] -tmp[255:224] := a[255:224] -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[31:0] := a[63:32] -tmp[63:32] := a[63:32] -tmp[95:64] := a[127:96] -tmp[127:96] := a[127:96] -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[31:0] := a[63:32] -tmp[63:32] := a[63:32] -tmp[95:64] := a[127:96] -tmp[127:96] := a[127:96] -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[31:0] := a[31:0] -tmp[63:32] := a[31:0] -tmp[95:64] := a[95:64] -tmp[127:96] := a[95:64] -tmp[159:128] := a[159:128] -tmp[191:160] := a[159:128] -tmp[223:192] := a[223:192] -tmp[255:224] := a[223:192] -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[31:0] := a[31:0] -tmp[63:32] := a[31:0] -tmp[95:64] := a[95:64] -tmp[127:96] := a[95:64] -tmp[159:128] := a[159:128] -tmp[191:160] := a[159:128] -tmp[223:192] := a[223:192] -tmp[255:224] := a[223:192] -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[31:0] := a[31:0] -tmp[63:32] := a[31:0] -tmp[95:64] := a[95:64] -tmp[127:96] := a[95:64] -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[31:0] := a[31:0] -tmp[63:32] := a[31:0] -tmp[95:64] := a[95:64] -tmp[127:96] := a[95:64] -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Move -
- - - - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] AND b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] AND b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] AND b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] AND b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] AND b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] AND b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] AND b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] AND b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 7 - i := j*32 - IF k[j] - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 7 - i := j*32 - IF k[j] - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 7 - i := j*32 - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 3 - i := j*32 - IF k[j] - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 3 - i := j*32 - IF k[j] - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 3 - i := j*32 - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 3 - i := j*64 - IF k[j] - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 3 - i := j*64 - IF k[j] - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 3 - i := j*64 - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 1 - i := j*64 - IF k[j] - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 1 - i := j*64 - IF k[j] - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 1 - i := j*64 - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := a[i+63:i] OR b[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] OR b[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] OR b[i+63:i] -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] OR b[i+31:i] -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Logical -
- - - - - - Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Set -
- - - - - Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Set -
- - - - - - Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Set -
- - - - - Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Set -
- - - - - - Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Set -
- - - - - Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Set -
- - - - - - Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Set -
- - - - - Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Set -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Shift -
- - - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - - Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." - FOR j := 0 to 3 - i := j*128 - a[i+127:i] := ShiftRows(a[i+127:i]) - a[i+127:i] := SubBytes(a[i+127:i]) - dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F - VAES -
immintrin.h
- Cryptography -
- - - - - Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." - FOR j := 0 to 3 - i := j*128 - a[i+127:i] := ShiftRows(a[i+127:i]) - a[i+127:i] := SubBytes(a[i+127:i]) - a[i+127:i] := MixColumns(a[i+127:i]) - dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F - VAES -
immintrin.h
- Cryptography -
- - - - - Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". - FOR j := 0 to 3 - i := j*128 - a[i+127:i] := InvShiftRows(a[i+127:i]) - a[i+127:i] := InvSubBytes(a[i+127:i]) - dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F - VAES -
immintrin.h
- Cryptography -
- - - - - Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". - FOR j := 0 to 3 - i := j*128 - a[i+127:i] := InvShiftRows(a[i+127:i]) - a[i+127:i] := InvSubBytes(a[i+127:i]) - a[i+127:i] := InvMixColumns(a[i+127:i]) - dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F - VAES -
immintrin.h
- Cryptography -
- - - - - - - - Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := a[63:0] + b[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := a[63:0] + b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := a[63:0] + b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := a[63:0] + b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := a[63:0] + b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := a[31:0] + b[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := a[31:0] + b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := a[31:0] + b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := a[31:0] + b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := a[31:0] + b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - dst[i+63:i] := a[i+63:i] / b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", =and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := 64*j - dst[i+63:i] := a[i+63:i] / b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - IF k[j] - dst[i+63:i] := a[i+63:i] / b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := 64*j - IF k[j] - dst[i+63:i] := a[i+63:i] / b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - IF k[j] - dst[i+63:i] := a[i+63:i] / b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := 64*j - IF k[j] - dst[i+63:i] := a[i+63:i] / b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := a[i+31:i] / b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := a[i+31:i] / b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := a[i+31:i] / b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := a[i+31:i] / b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := a[i+31:i] / b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := a[i+31:i] / b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := a[63:0] / b[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := a[63:0] / b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := a[63:0] / b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := a[63:0] / b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := a[63:0] / b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := a[31:0] / b[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := a[31:0] / b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := a[31:0] / b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := a[31:0] / b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := a[31:0] / b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "a" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := c[63:0] -FI -dst[127:64] := c[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := c[63:0] -FI -dst[127:64] := c[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := c[31:0] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := c[31:0] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := c[63:0] -FI -dst[127:64] := c[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := c[63:0] -FI -dst[127:64] := c[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := c[31:0] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := c[31:0] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := c[63:0] -FI -dst[127:64] := c[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := c[63:0] -FI -dst[127:64] := c[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := c[31:0] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := c[31:0] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := c[63:0] -FI -dst[127:64] := c[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst". - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := c[63:0] -FI -dst[127:64] := c[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := c[31:0] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst". - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := c[31:0] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := a[63:0] * b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := a[63:0] * b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := a[63:0] * b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := a[63:0] * b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := a[63:0] * b[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := a[31:0] * b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := a[31:0] * b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := a[31:0] * b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := a[31:0] * b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := a[31:0] * b[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] + b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+31:i] * b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] - b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := a[63:0] - b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := a[63:0] - b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := a[63:0] - b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := a[63:0] - b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := a[63:0] - b[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := a[31:0] - b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := a[31:0] - b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := a[31:0] - b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := a[31:0] - b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := a[31:0] - b[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 16-bit mask from "a" into memory. - -MEM[mem_addr+15:mem_addr] := a[15:0] - - - AVX512F -
immintrin.h
- Store -
- - Swizzle - - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 64 -m := base_addr -FOR j := 0 to 7 - i := j*64 - IF k[j] - MEM[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - Swizzle - - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 32 -m := base_addr -FOR j := 0 to 15 - i := j*32 - IF k[j] - MEM[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store packed 32-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits of integer data from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store packed 64-bit integers from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits of integer data from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store the lower double-precision (64-bit) floating-point element from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -IF k[0] - MEM[mem_addr+63:mem_addr] := a[63:0] -FI - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store the lower single-precision (32-bit) floating-point element from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -IF k[0] - MEM[mem_addr+31:mem_addr] := a[31:0] -FI - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - Swizzle - - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 32 -m := base_addr -FOR j := 0 to 15 - i := j*32 - IF k[j] - MEM[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - Swizzle - - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 64 -m := base_addr -FOR j := 0 to 7 - i := j*64 - IF k[j] - MEM[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] * b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Store -
- - - - - - - Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Store -
- - - - Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 16-bit mask from memory into "k". - -k[15:0] := MEM[mem_addr+15:mem_addr] - - - AVX512F -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - Swizzle - - - - Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - Swizzle - - - - Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits of integer data from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits of integer data from memory into "dst" using a non-temporal memory hint. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -IF k[0] - dst[63:0] := MEM[mem_addr+63:mem_addr] -ELSE - dst[63:0] := src[63:0] -FI -dst[MAX:64] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -IF k[0] - dst[63:0] := MEM[mem_addr+63:mem_addr] -ELSE - dst[63:0] := 0 -FI -dst[MAX:64] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -IF k[0] - dst[31:0] := MEM[mem_addr+31:mem_addr] -ELSE - dst[31:0] := src[31:0] -FI -dst[MAX:32] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -IF k[0] - dst[31:0] := MEM[mem_addr+31:mem_addr] -ELSE - dst[31:0] := 0 -FI -dst[MAX:32] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - Swizzle - - - - Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - Swizzle - - - - Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*32 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*64 - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 7 - i := j*64 - m := j*64 - IF k[j] - addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k". - -k[15:0] := a[15:0] AND b[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k". - -k[15:0] := (NOT a[15:0]) AND b[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - Compute the bitwise NOT of 16-bit mask "a", and store the result in "k". - -k[15:0] := NOT a[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k". - -k[15:0] := a[15:0] OR b[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k". - -k[15:0] := NOT (a[15:0] XOR b[15:0]) -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k". - -k[15:0] := a[15:0] XOR b[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Shift the bits of 16-bit mask "a" left by "count" while shifting in zeros, and store the least significant 16 bits of the result in "k". - -k[MAX:0] := 0 -IF count[7:0] <= 15 - k[15:0] := a[15:0] << count[7:0] -FI - - - AVX512F -
immintrin.h
- Mask -
- - - - - Shift the bits of 16-bit mask "a" right by "count" while shifting in zeros, and store the least significant 16 bits of the result in "k". - -k[MAX:0] := 0 -IF count[7:0] <= 15 - k[15:0] := a[15:0] >> count[7:0] -FI - - - AVX512F -
immintrin.h
- Mask -
- - - - - - Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones". - -tmp[15:0] := a[15:0] OR b[15:0] -IF tmp[15:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI -IF tmp[15:0] == 0xFFFF - MEM[all_ones+7:all_ones] := 1 -ELSE - MEM[all_ones+7:all_ones] := 0 -FI - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst". - -tmp[15:0] := a[15:0] OR b[15:0] -IF tmp[15:0] == 0x0 - dst := 1 -ELSE - dst := 0 -FI - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst". - -tmp[15:0] := a[15:0] OR b[15:0] -IF tmp[15:0] == 0xFFFF - dst := 1 -ELSE - dst := 0 -FI - - - AVX512F -
immintrin.h
- Mask -
- - - - Convert 16-bit mask "a" into an integer value, and store the result in "dst". - -dst := ZeroExtend32(a[15:0]) - - - AVX512F -
immintrin.h
- Mask -
- - - - Convert integer value "a" into an 16-bit mask, and store the result in "k". - -k := ZeroExtend16(a[15:0]) - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k". - -k[15:0] := (NOT a[15:0]) AND b[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k". - -k[15:0] := a[15:0] AND b[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - Copy 16-bit mask "a" to "k". - -k[15:0] := a[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - Compute the bitwise NOT of 16-bit mask "a", and store the result in "k". - -k[15:0] := NOT a[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k". - -k[15:0] := a[15:0] OR b[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Unpack and interleave 8 bits from masks "a" and "b", and store the 16-bit result in "k". - -k[7:0] := b[7:0] -k[15:8] := a[7:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k". - -k[15:0] := NOT (a[15:0] XOR b[15:0]) -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k". - -k[15:0] := a[15:0] XOR b[15:0] -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Mask -
- - - - - Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0. - dst[15:0] := k1[15:0] | k2[15:0] -IF dst == 0 - SetZF() -FI - - - AVX512F -
immintrin.h
- Mask -
- - - - - Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's. - dst[15:0] := k1[15:0] | k2[15:0] -IF PopCount(dst[15:0]) == 16 - SetCF() -FI - - - AVX512F -
immintrin.h
- Mask -
- - - - Converts bit mask "k1" into an integer value, storing the results in "dst". - -dst := ZeroExtend32(k1) - - - AVX512F -
immintrin.h
- Mask -
- - - - Converts integer "mask" into bitmask, storing the result in "dst". - -dst := mask[15:0] - - - AVX512F -
immintrin.h
- Mask -
- - - - - - - Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and stores the low 64 bytes (16 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -temp[1023:512] := a[511:0] -temp[511:0] := b[511:0] -temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := temp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 64 bytes (8 elements) in "dst". - -temp[1023:512] := a[511:0] -temp[511:0] := b[511:0] -temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) -dst[511:0] := temp[511:0] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 64 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -temp[1023:512] := a[511:0] -temp[511:0] := b[511:0] -temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := temp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and stores the low 64 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -temp[1023:512] := a[511:0] -temp[511:0] := b[511:0] -temp[1023:0] := temp[1023:0] >> (64*imm8[2:0]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := temp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) -dst[127:64] := b[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) -dst[127:64] := b[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -IF k[0] - dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := b[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -IF k[0] - dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) -ELSE - dst[63:0] := a[63:0] -FI -dst[127:64] := b[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -IF k[0] - dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := b[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "b" to the upper element of "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) { - tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0] - CASE(tsrc[63:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[63:0] := src1[63:0] - 1 : dest[63:0] := tsrc[63:0] - 2 : dest[63:0] := QNaN(tsrc[63:0]) - 3 : dest[63:0] := QNAN_Indefinite - 4 : dest[63:0] := -INF - 5 : dest[63:0] := +INF - 6 : dest[63:0] := tsrc.sign? -INF : +INF - 7 : dest[63:0] := -0 - 8 : dest[63:0] := +0 - 9 : dest[63:0] := -1 - 10: dest[63:0] := +1 - 11: dest[63:0] := 1/2 - 12: dest[63:0] := 90.0 - 13: dest[63:0] := PI/2 - 14: dest[63:0] := MAX_FLOAT - 15: dest[63:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[63:0] -} -IF k[0] - dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := b[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) -dst[127:32] := b[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) -dst[127:32] := b[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -IF k[0] - dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := b[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -IF k[0] - dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) -ELSE - dst[31:0] := a[31:0] -FI -dst[127:32] := b[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. - [sae_note] - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -IF k[0] - dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := b[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "b" to the upper elements of "dst". "imm8" is used to set the required flags reporting. - enum TOKEN_TYPE { - QNAN_TOKEN := 0, \ - SNAN_TOKEN := 1, \ - ZERO_VALUE_TOKEN := 2, \ - ONE_VALUE_TOKEN := 3, \ - NEG_INF_TOKEN := 4, \ - POS_INF_TOKEN := 5, \ - NEG_VALUE_TOKEN := 6, \ - POS_VALUE_TOKEN := 7 -} -DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) { - tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0] - CASE(tsrc[31:0]) OF - QNAN_TOKEN:j := 0 - SNAN_TOKEN:j := 1 - ZERO_VALUE_TOKEN: j := 2 - ONE_VALUE_TOKEN: j := 3 - NEG_INF_TOKEN: j := 4 - POS_INF_TOKEN: j := 5 - NEG_VALUE_TOKEN: j := 6 - POS_VALUE_TOKEN: j := 7 - ESAC - - token_response[3:0] := src3[3+4*j:4*j] - - CASE(token_response[3:0]) OF - 0 : dest[31:0] := src1[31:0] - 1 : dest[31:0] := tsrc[31:0] - 2 : dest[31:0] := QNaN(tsrc[31:0]) - 3 : dest[31:0] := QNAN_Indefinite - 4 : dest[31:0] := -INF - 5 : dest[31:0] := +INF - 6 : dest[31:0] := tsrc.sign? -INF : +INF - 7 : dest[31:0] := -0 - 8 : dest[31:0] := +0 - 9 : dest[31:0] := -1 - 10: dest[31:0] := +1 - 11: dest[31:0] := 1/2 - 12: dest[31:0] := 90.0 - 13: dest[31:0] := PI/2 - 14: dest[31:0] := MAX_FLOAT - 15: dest[31:0] := -MAX_FLOAT - ESAC - - CASE(tsrc[31:0]) OF - ZERO_VALUE_TOKEN: - IF (imm8[0]) #ZE; FI - ZERO_VALUE_TOKEN: - IF (imm8[1]) #IE; FI - ONE_VALUE_TOKEN: - IF (imm8[2]) #ZE; FI - ONE_VALUE_TOKEN: - IF (imm8[3]) #IE; FI - SNAN_TOKEN: - IF (imm8[4]) #IE; FI - NEG_INF_TOKEN: - IF (imm8[5]) #IE; FI - NEG_VALUE_TOKEN: - IF (imm8[6]) #IE; FI - POS_INF_TOKEN: - IF (imm8[7]) #IE; FI - ESAC - RETURN dest[31:0] -} -IF k[0] - dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := b[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - [sae_note] - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - [sae_note] - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - [sae_note] - dst[63:0] := ConvertExpFP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - dst[63:0] := ConvertExpFP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - [sae_note] - IF k[0] - dst[63:0] := ConvertExpFP64(b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - IF k[0] - dst[63:0] := ConvertExpFP64(b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - [sae_note] - IF k[0] - dst[63:0] := ConvertExpFP64(b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - IF k[0] - dst[63:0] := ConvertExpFP64(b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - [sae_note] - dst[31:0] := ConvertExpFP32(b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - dst[31:0] := ConvertExpFP32(b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - [sae_note] - IF k[0] - dst[31:0] := ConvertExpFP32(b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - IF k[0] - dst[31:0] := ConvertExpFP32(b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - [sae_note] - IF k[0] - dst[31:0] := ConvertExpFP32(b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - IF k[0] - dst[31:0] := ConvertExpFP32(b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - - Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - IF k[0] - dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - IF k[0] - dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - IF k[0] - dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - IF k[0] - dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - - Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - IF k[0] - dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - IF k[0] - dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - IF k[0] - dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - IF k[0] - dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -IF k[0] - dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -IF k[0] - dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -IF k[0] - dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -IF k[0] - dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note] - -DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) { - m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0]) - IF IsInf(tmp[63:0]) - tmp[63:0] := src1[63:0] - FI - RETURN tmp[63:0] -} -dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -IF k[0] - dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -IF k[0] - dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -IF k[0] - dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -IF k[0] - dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) { - m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0]) - IF IsInf(tmp[31:0]) - tmp[31:0] := src1[31:0] - FI - RETURN tmp[31:0] -} -dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[31:0] -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -IF k[0] - dst[63:0] := SCALE(a[63:0], b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -IF k[0] - dst[63:0] := SCALE(a[63:0], b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -IF k[0] - dst[63:0] := SCALE(a[63:0], b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -IF k[0] - dst[63:0] := SCALE(a[63:0], b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -dst[63:0] := SCALE(a[63:0], b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0])) - RETURN dst[63:0] -} -dst[63:0] := SCALE(a[63:0], b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[63:0] -} -IF k[0] - dst[31:0] := SCALE(a[31:0], b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[63:0] -} -IF k[0] - dst[31:0] := SCALE(a[31:0], b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[63:0] -} -IF k[0] - dst[31:0] := SCALE(a[31:0], b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[63:0] -} -IF k[0] - dst[31:0] := SCALE(a[31:0], b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[63:0] -} -dst[31:0] := SCALE(a[31:0], b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - DEFINE SCALE(src1, src2) { - IF (src2 == NaN) - IF (src2 == SNaN) - RETURN QNAN(src2) - FI - ELSE IF (src1 == NaN) - IF (src1 == SNaN) - RETURN QNAN(src1) - FI - IF (src2 != INF) - RETURN QNAN(src1) - FI - ELSE - tmp_src2 := src2 - tmp_src1 := src1 - IF (IS_DENORMAL(src2) AND MXCSR.DAZ) - tmp_src2 := 0 - FI - IF (IS_DENORMAL(src1) AND MXCSR.DAZ) - tmp_src1 := 0 - FI - FI - dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0])) - RETURN dst[63:0] -} -dst[31:0] := SCALE(a[31:0], b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - n := (j % 4)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 4)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 4)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*64 - n := (j % 4)*64 - dst[i+63:i] := a[n+63:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - n := (j % 4)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - n := (j % 4)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - n := (j % 4)*32 - dst[i+31:i] := a[n+31:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 4)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - n := (j % 4)*32 - IF k[j] - dst[i+31:i] := a[n+31:n] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*64 - n := (j % 4)*64 - dst[i+63:i] := a[n+63:n] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - n := (j % 4)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - n := (j % 4)*64 - IF k[j] - dst[i+63:i] := a[n+63:n] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 64 -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[511:m] := src[511:m] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 64 -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[511:m] := 0 -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 32 -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[511:m] := src[511:m] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 32 -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[511:m] := 0 -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[1:0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -2: dst[127:0] := a[383:256] -3: dst[127:0] := a[511:384] -ESAC -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[1:0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -2: tmp[127:0] := a[383:256] -3: tmp[127:0] := a[511:384] -ESAC -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[1:0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -2: tmp[127:0] := a[383:256] -3: tmp[127:0] := a[511:384] -ESAC -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[255:0] := a[255:0] -1: dst[255:0] := a[511:256] -ESAC -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[255:0] := a[255:0] -1: tmp[255:0] := a[511:256] -ESAC -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[255:0] := a[255:0] -1: tmp[255:0] := a[511:256] -ESAC -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[1:0] OF -0: dst[127:0] := a[127:0] -1: dst[127:0] := a[255:128] -2: dst[127:0] := a[383:256] -3: dst[127:0] := a[511:384] -ESAC -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[1:0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -2: tmp[127:0] := a[383:256] -3: tmp[127:0] := a[511:384] -ESAC -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[1:0] OF -0: tmp[127:0] := a[127:0] -1: tmp[127:0] := a[255:128] -2: tmp[127:0] := a[383:256] -3: tmp[127:0] := a[511:384] -ESAC -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst". - -CASE imm8[0] OF -0: dst[255:0] := a[255:0] -1: dst[255:0] := a[511:256] -ESAC -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[255:0] := a[255:0] -1: tmp[255:0] := a[511:256] -ESAC -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -CASE imm8[0] OF -0: tmp[255:0] := a[255:0] -1: tmp[255:0] := a[511:256] -ESAC -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". - -dst[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -2: dst[383:256] := b[127:0] -3: dst[511:384] := b[127:0] -ESAC -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -2: tmp[383:256] := b[127:0] -3: tmp[511:384] := b[127:0] -ESAC -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -2: tmp[383:256] := b[127:0] -3: tmp[511:384] := b[127:0] -ESAC -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8". - -dst[511:0] := a[511:0] -CASE (imm8[0]) OF -0: dst[255:0] := b[255:0] -1: dst[511:256] := b[255:0] -ESAC -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[0]) OF -0: tmp[255:0] := b[255:0] -1: tmp[511:256] := b[255:0] -ESAC -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[0]) OF -0: tmp[255:0] := b[255:0] -1: tmp[511:256] := b[255:0] -ESAC -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8". - -dst[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: dst[127:0] := b[127:0] -1: dst[255:128] := b[127:0] -2: dst[383:256] := b[127:0] -3: dst[511:384] := b[127:0] -ESAC -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -2: tmp[383:256] := b[127:0] -3: tmp[511:384] := b[127:0] -ESAC -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[1:0]) OF -0: tmp[127:0] := b[127:0] -1: tmp[255:128] := b[127:0] -2: tmp[383:256] := b[127:0] -3: tmp[511:384] := b[127:0] -ESAC -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8". - -dst[511:0] := a[511:0] -CASE (imm8[0]) OF -0: dst[255:0] := b[255:0] -1: dst[511:256] := b[255:0] -ESAC -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[0]) OF -0: tmp[255:0] := b[255:0] -1: tmp[511:256] := b[255:0] -ESAC -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8". Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[511:0] := a[511:0] -CASE (imm8[0]) OF -0: tmp[255:0] := b[255:0] -1: tmp[511:256] := b[255:0] -ESAC -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 32 -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[511:m] := src[511:m] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 32 -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[m+size-1:m] := a[i+31:i] - m := m + size - FI -ENDFOR -dst[511:m] := 0 -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 64 -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[511:m] := src[511:m] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 64 -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[m+size-1:m] := a[i+63:i] - m := m + size - FI -ENDFOR -dst[511:m] := 0 -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - id := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - id := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - id := idx[i+3:i]*32 - dst[i+31:i] := a[id+31:id] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - off := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := idx[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - off := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - off := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - off := idx[i+3:i]*32 - dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] -ENDFOR -dst[MAX:512] := 0 - - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set) - -FOR j := 0 to 7 - i := j*64 - off := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := idx[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - off := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - off := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - off := idx[i+2:i]*64 - dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] -ENDFOR -dst[MAX:512] := 0 - - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - off := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := idx[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - off := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - off := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - off := idx[i+3:i]*32 - dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off] -ENDFOR -dst[MAX:512] := 0 - - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - off := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := idx[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - off := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - off := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - off := idx[i+2:i]*64 - dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off] -ENDFOR -dst[MAX:512] := 0 - - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI -IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI -IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI -IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI -IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI -IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]; FI -IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]; FI -IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]; FI -IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]; FI -IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]; FI -IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]; FI -IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]; FI -IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]; FI -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI -IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI -IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI -IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI -IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI -IF (b[257] == 0) tmp_dst[319:256] := a[319:256]; FI -IF (b[257] == 1) tmp_dst[319:256] := a[383:320]; FI -IF (b[321] == 0) tmp_dst[383:320] := a[319:256]; FI -IF (b[321] == 1) tmp_dst[383:320] := a[383:320]; FI -IF (b[385] == 0) tmp_dst[447:384] := a[447:384]; FI -IF (b[385] == 1) tmp_dst[447:384] := a[511:448]; FI -IF (b[449] == 0) tmp_dst[511:448] := a[447:384]; FI -IF (b[449] == 1) tmp_dst[511:448] := a[511:448]; FI -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI -IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI -IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI -IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI -IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI -IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]; FI -IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]; FI -IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]; FI -IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]; FI -IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]; FI -IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]; FI -IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]; FI -IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]; FI -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI -IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI -IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI -IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI -IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI -IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI -IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI -IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI -IF (b[257] == 0) tmp_dst[319:256] := a[319:256]; FI -IF (b[257] == 1) tmp_dst[319:256] := a[383:320]; FI -IF (b[321] == 0) tmp_dst[383:320] := a[319:256]; FI -IF (b[321] == 1) tmp_dst[383:320] := a[383:320]; FI -IF (b[385] == 0) tmp_dst[447:384] := a[447:384]; FI -IF (b[385] == 1) tmp_dst[447:384] := a[511:448]; FI -IF (b[449] == 0) tmp_dst[511:448] := a[447:384]; FI -IF (b[449] == 1) tmp_dst[511:448] := a[511:448]; FI -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". - -IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI -IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI -IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI -IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI -IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI -IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI -IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI -IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI -IF (imm8[4] == 0) dst[319:256] := a[319:256]; FI -IF (imm8[4] == 1) dst[319:256] := a[383:320]; FI -IF (imm8[5] == 0) dst[383:320] := a[319:256]; FI -IF (imm8[5] == 1) dst[383:320] := a[383:320]; FI -IF (imm8[6] == 0) dst[447:384] := a[447:384]; FI -IF (imm8[6] == 1) dst[447:384] := a[511:448]; FI -IF (imm8[7] == 0) dst[511:448] := a[447:384]; FI -IF (imm8[7] == 1) dst[511:448] := a[511:448]; FI -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". - -IF (b[1] == 0) dst[63:0] := a[63:0]; FI -IF (b[1] == 1) dst[63:0] := a[127:64]; FI -IF (b[65] == 0) dst[127:64] := a[63:0]; FI -IF (b[65] == 1) dst[127:64] := a[127:64]; FI -IF (b[129] == 0) dst[191:128] := a[191:128]; FI -IF (b[129] == 1) dst[191:128] := a[255:192]; FI -IF (b[193] == 0) dst[255:192] := a[191:128]; FI -IF (b[193] == 1) dst[255:192] := a[255:192]; FI -IF (b[257] == 0) dst[319:256] := a[319:256]; FI -IF (b[257] == 1) dst[319:256] := a[383:320]; FI -IF (b[321] == 0) dst[383:320] := a[319:256]; FI -IF (b[321] == 1) dst[383:320] := a[383:320]; FI -IF (b[385] == 0) dst[447:384] := a[447:384]; FI -IF (b[385] == 1) dst[447:384] := a[511:448]; FI -IF (b[449] == 0) dst[511:448] := a[447:384]; FI -IF (b[449] == 1) dst[511:448] := a[511:448]; FI -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) -tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) -tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) -tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) -tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) -tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) -tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) -tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) -tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) -tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) -tmp_dst[287:256] := SELECT4(a[383:256], b[257:256]) -tmp_dst[319:288] := SELECT4(a[383:256], b[289:288]) -tmp_dst[351:320] := SELECT4(a[383:256], b[321:320]) -tmp_dst[383:352] := SELECT4(a[383:256], b[353:352]) -tmp_dst[415:384] := SELECT4(a[511:384], b[385:384]) -tmp_dst[447:416] := SELECT4(a[511:384], b[417:416]) -tmp_dst[479:448] := SELECT4(a[511:384], b[449:448]) -tmp_dst[511:480] := SELECT4(a[511:384], b[481:480]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) -tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) -tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) -tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], b[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], b[33:32]) -tmp_dst[95:64] := SELECT4(a[127:0], b[65:64]) -tmp_dst[127:96] := SELECT4(a[127:0], b[97:96]) -tmp_dst[159:128] := SELECT4(a[255:128], b[129:128]) -tmp_dst[191:160] := SELECT4(a[255:128], b[161:160]) -tmp_dst[223:192] := SELECT4(a[255:128], b[193:192]) -tmp_dst[255:224] := SELECT4(a[255:128], b[225:224]) -tmp_dst[287:256] := SELECT4(a[383:256], b[257:256]) -tmp_dst[319:288] := SELECT4(a[383:256], b[289:288]) -tmp_dst[351:320] := SELECT4(a[383:256], b[321:320]) -tmp_dst[383:352] := SELECT4(a[383:256], b[353:352]) -tmp_dst[415:384] := SELECT4(a[511:384], b[385:384]) -tmp_dst[447:416] := SELECT4(a[511:384], b[417:416]) -tmp_dst[479:448] := SELECT4(a[511:384], b[449:448]) -tmp_dst[511:480] := SELECT4(a[511:384], b[481:480]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -dst[351:320] := SELECT4(a[383:256], imm8[5:4]) -dst[383:352] := SELECT4(a[383:256], imm8[7:6]) -dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -dst[479:448] := SELECT4(a[511:384], imm8[5:4]) -dst[511:480] := SELECT4(a[511:384], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], b[1:0]) -dst[63:32] := SELECT4(a[127:0], b[33:32]) -dst[95:64] := SELECT4(a[127:0], b[65:64]) -dst[127:96] := SELECT4(a[127:0], b[97:96]) -dst[159:128] := SELECT4(a[255:128], b[129:128]) -dst[191:160] := SELECT4(a[255:128], b[161:160]) -dst[223:192] := SELECT4(a[255:128], b[193:192]) -dst[255:224] := SELECT4(a[255:128], b[225:224]) -dst[287:256] := SELECT4(a[383:256], b[257:256]) -dst[319:288] := SELECT4(a[383:256], b[289:288]) -dst[351:320] := SELECT4(a[383:256], b[321:320]) -dst[383:352] := SELECT4(a[383:256], b[353:352]) -dst[415:384] := SELECT4(a[511:384], b[385:384]) -dst[447:416] := SELECT4(a[511:384], b[417:416]) -dst[479:448] := SELECT4(a[511:384], b[449:448]) -dst[511:480] := SELECT4(a[511:384], b[481:480]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) -tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) -tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) -tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - id := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := a[id+63:id] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) -tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) -tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) -tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - id := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := a[id+63:id] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -dst[319:256] := SELECT4(a[511:256], imm8[1:0]) -dst[383:320] := SELECT4(a[511:256], imm8[3:2]) -dst[447:384] := SELECT4(a[511:256], imm8[5:4]) -dst[511:448] := SELECT4(a[511:256], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - id := idx[i+2:i]*64 - dst[i+63:i] := a[id+63:id] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - id := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - id := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx". - -FOR j := 0 to 15 - i := j*32 - id := idx[i+3:i]*32 - dst[i+31:i] := a[id+31:id] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) -tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) -tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) -tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - id := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := a[id+63:id] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0]) -tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2]) -tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4]) -tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - id := idx[i+2:i]*64 - IF k[j] - dst[i+63:i] := a[id+63:id] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[63:0] := src[63:0] - 1: tmp[63:0] := src[127:64] - 2: tmp[63:0] := src[191:128] - 3: tmp[63:0] := src[255:192] - ESAC - RETURN tmp[63:0] -} -dst[63:0] := SELECT4(a[255:0], imm8[1:0]) -dst[127:64] := SELECT4(a[255:0], imm8[3:2]) -dst[191:128] := SELECT4(a[255:0], imm8[5:4]) -dst[255:192] := SELECT4(a[255:0], imm8[7:6]) -dst[319:256] := SELECT4(a[511:256], imm8[1:0]) -dst[383:320] := SELECT4(a[511:256], imm8[3:2]) -dst[447:384] := SELECT4(a[511:256], imm8[5:4]) -dst[511:448] := SELECT4(a[511:256], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - id := idx[i+2:i]*64 - dst[i+63:i] := a[id+63:id] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[m+31:m] - m := m + 32 - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[m+63:m] - m := m + 64 - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) -tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) -tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) -tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[127:0] := src[127:0] - 1: tmp[127:0] := src[255:128] - 2: tmp[127:0] := src[383:256] - 3: tmp[127:0] := src[511:384] - ESAC - RETURN tmp[127:0] -} -dst[127:0] := SELECT4(a[511:0], imm8[1:0]) -dst[255:128] := SELECT4(a[511:0], imm8[3:2]) -dst[383:256] := SELECT4(b[511:0], imm8[5:4]) -dst[511:384] := SELECT4(b[511:0], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] -tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] -tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] -tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] -tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] -tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] -tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] -tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] -tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] -tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] -tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] -tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] -tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst". - -dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] -dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192] -dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192] -dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320] -dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320] -dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448] -dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) -tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4]) -tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6]) -tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4]) -tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6]) -tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4]) -tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6]) -tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4]) -tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -dst[127:96] := SELECT4(b[127:0], imm8[7:6]) -dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -dst[223:192] := SELECT4(b[255:128], imm8[5:4]) -dst[255:224] := SELECT4(b[255:128], imm8[7:6]) -dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -dst[351:320] := SELECT4(b[383:256], imm8[5:4]) -dst[383:352] := SELECT4(b[383:256], imm8[7:6]) -dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -dst[479:448] := SELECT4(b[511:384], imm8[5:4]) -dst[511:480] := SELECT4(b[511:384], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp_dst[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) -tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) -dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128]) -dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256]) -dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 -k[MAX:1] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 -k[MAX:1] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -IF k1[0] - k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -IF k1[0] - k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0 -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 -k[MAX:1] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 -k[MAX:1] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -IF k1[0] - k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -IF k1[0] - k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0 -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -RETURN ( a[63:0] OP b[63:0] ) ? 1 : 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -RETURN ( a[31:0] OP b[31:0] ) ? 1 : 0 - - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] >= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] > b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] <= b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] < b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - m := j*64 - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - m := j*64 - IF k[j] - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) - ELSE - dst[m+63:m] := src[m+63:m] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - m := j*64 - IF k[j] - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) - ELSE - dst[m+63:m] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". [sae_note] - -FOR j := 0 to 15 - i := j*32 - m := j*16 - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - m := j*16 - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 15 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 15 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". [sae_note] - -FOR j := 0 to 7 - i := 64*j - k := 32*j - dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 32*j - dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". [round2_note] - -FOR j := 0 to 15 - i := 16*j - l := 32*j - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". [round2_note] - -FOR j := 0 to 15 - i := 16*j - l := 32*j - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round2_note] - -FOR j := 0 to 15 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round2_note] - -FOR j := 0 to 15 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round2_note] - -FOR j := 0 to 15 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round2_note] - -FOR j := 0 to 15 - i := 16*j - l := 32*j - IF k[j] - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - [round_note] - -dst[31:0] := Convert_FP64_To_Int32(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - [round_note] - -dst[63:0] := Convert_FP64_To_Int64(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - [round_note] - -dst[31:0] := Convert_FP64_To_Int32(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - [round_note] - -dst[63:0] := Convert_FP64_To_Int64(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - -dst[31:0] := Convert_FP64_To_Int32(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - -dst[63:0] := Convert_FP64_To_Int64(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := Convert_FP64_To_FP32(b[63:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := Convert_FP64_To_FP32(b[63:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := Convert_FP64_To_FP32(b[63:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := Convert_FP64_To_FP32(b[63:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := Convert_FP64_To_FP32(b[63:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". - [round_note] - -dst[31:0] := Convert_FP64_To_UInt32(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". - [round_note] - -dst[63:0] := Convert_FP64_To_UInt64(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". - -dst[31:0] := Convert_FP64_To_UInt32(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". - -dst[63:0] := Convert_FP64_To_UInt64(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := Convert_Int64_To_FP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := Convert_Int64_To_FP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the signed 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := Convert_Int32_To_FP64(b[31:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := Convert_Int64_To_FP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := Convert_Int64_To_FP32(b[63:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := Convert_Int64_To_FP32(b[63:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int64_To_FP32(b[63:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [sae_note] - -dst[63:0] := Convert_FP32_To_FP64(b[31:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [sae_note] - -IF k[0] - dst[63:0] := Convert_FP32_To_FP64(b[31:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := Convert_FP32_To_FP64(b[31:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [sae_note] - -IF k[0] - dst[63:0] := Convert_FP32_To_FP64(b[31:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := Convert_FP32_To_FP64(b[31:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - [round_note] - -dst[31:0] := Convert_FP32_To_Int32(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - [round_note] - -dst[63:0] := Convert_FP32_To_Int64(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - [round_note] - -dst[31:0] := Convert_FP32_To_Int32(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - [round_note] - -dst[63:0] := Convert_FP32_To_Int64(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - -dst[31:0] := Convert_FP32_To_Int32(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - -dst[63:0] := Convert_FP32_To_Int64(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". - [round_note] - -dst[31:0] := Convert_FP32_To_UInt32(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". - [round_note] - -dst[63:0] := Convert_FP32_To_UInt64(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". - -dst[31:0] := Convert_FP32_To_UInt32(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". - -dst[63:0] := Convert_FP32_To_UInt64(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 32*j - l := 64*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - -dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - -dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[31:0] := Convert_FP64_To_UInt32_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[63:0] := Convert_FP64_To_UInt64_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". - -dst[31:0] := Convert_FP64_To_UInt32_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". - -dst[63:0] := Convert_FP64_To_UInt64_Truncate(a[63:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - -dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - -dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[31:0] := Convert_FP32_To_UInt32_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". - [sae_note] - -dst[63:0] := Convert_FP32_To_UInt64_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". - -dst[31:0] := Convert_FP32_To_UInt32_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". - -dst[63:0] := Convert_FP32_To_UInt64_Truncate(a[31:0]) - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - l := j*32 - dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - IF k[j] - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := Convert_Int64_To_FP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the unsigned 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := Convert_Int32_To_FP64(b[31:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := Convert_Int64_To_FP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := Convert_Int64_To_FP32(b[63:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int64_To_FP32(b[63:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 8*j - dst[k+7:k] := Truncate8(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 16*j - dst[k+15:k] := Truncate16(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 8*j - dst[k+7:k] := Truncate8(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Truncate8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 32*j - dst[k+31:k] := Truncate32(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Truncate32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Truncate32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 16*j - dst[k+15:k] := Truncate16(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Truncate16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 8*j - dst[k+7:k] := Saturate8(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 16*j - dst[k+15:k] := Saturate16(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 8*j - dst[k+7:k] := Saturate8(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := Saturate8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 32*j - dst[k+31:k] := Saturate32(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Saturate32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := Saturate32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 16*j - dst[k+15:k] := Saturate16(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := Saturate16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 8*j - dst[i+31:i] := SignExtend32(a[k+7:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 8*j - dst[i+63:i] := SignExtend64(a[k+7:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 32*j - dst[i+63:i] := SignExtend64(a[k+31:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 16*j - dst[i+31:i] := SignExtend32(a[k+15:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - l := j*16 - IF k[j] - dst[i+31:i] := SignExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := SignExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 16*j - dst[i+63:i] := SignExtend64(a[k+15:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := SignExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 8*j - dst[k+7:k] := SaturateU8(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+31:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 32-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+31:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 16*j - dst[k+15:k] := SaturateU16(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+31:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 32-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+31:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 8*j - dst[k+7:k] := SaturateU8(a[i+63:i]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+63:i]) - ELSE - dst[l+7:l] := src[l+7:l] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[l+7:l] := SaturateU8(a[i+63:i]) - ELSE - dst[l+7:l] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 32*j - dst[k+31:k] := SaturateU32(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := SaturateU32(a[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[l+31:l] := SaturateU32(a[i+63:i]) - ELSE - dst[l+31:l] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 16*j - dst[k+15:k] := SaturateU16(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+63:i]) - ELSE - dst[l+15:l] := src[l+15:l] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - Store - - - - - Convert packed unsigned 64-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i]) - FI -ENDFOR - - - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[l+15:l] := SaturateU16(a[i+63:i]) - ELSE - dst[l+15:l] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 8*j - dst[i+31:i] := ZeroExtend32(a[k+7:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 8*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+7:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 8*j - dst[i+63:i] := ZeroExtend64(a[k+7:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 8*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+7:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 32*j - dst[i+63:i] := ZeroExtend64(a[k+31:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 32*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+31:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 15 - i := 32*j - k := 16*j - dst[i+31:i] := ZeroExtend32(a[k+15:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := 32*j - l := 16*j - IF k[j] - dst[i+31:i] := ZeroExtend32(a[l+15:l]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := 64*j - k := 16*j - dst[i+63:i] := ZeroExtend64(a[k+15:k]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := 64*j - l := 16*j - IF k[j] - dst[i+63:i] := ZeroExtend64(a[l+15:l]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". - -dst[31:0] := a[31:0] - - - AVX512F -
immintrin.h
- Convert -
- - - - Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". - -dst[63:0] := a[63:0] - - - AVX512F -
immintrin.h
- Convert -
- - - - Copy the lower 32-bit integer in "a" to "dst". - -dst[31:0] := a[31:0] - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] - -IF k[0] - dst[63:0] := MAX(a[63:0], b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := MAX(a[63:0], b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] - -IF k[0] - dst[63:0] := MAX(a[63:0], b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := MAX(a[63:0], b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [sae_note][max_float_note] - -dst[63:0] := MAX(a[63:0], b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] - -IF k[0] - dst[31:0] := MAX(a[31:0], b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := MAX(a[31:0], b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] - -IF k[0] - dst[31:0] := MAX(a[31:0], b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := MAX(a[31:0], b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] - -dst[31:0] := MAX(a[31:0], b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note][min_float_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note][min_float_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] - -IF k[0] - dst[63:0] := MIN(a[63:0], b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := MIN(a[63:0], b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] - -IF k[0] - dst[63:0] := MIN(a[63:0], b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := MIN(a[63:0], b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" , and copy the upper element from "a" to the upper element of "dst". [sae_note][min_float_note] - -dst[63:0] := MIN(a[63:0], b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] - -IF k[0] - dst[31:0] := MIN(a[31:0], b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := MIN(a[31:0], b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] - -IF k[0] - dst[31:0] := MIN(a[31:0], b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := MIN(a[31:0], b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] - -dst[31:0] := MIN(a[31:0], b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ABS(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ABS(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ABS(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ABS(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ABS(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ABS(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[63:0] := a[63:0] -tmp[127:64] := a[63:0] -tmp[191:128] := a[191:128] -tmp[255:192] := a[191:128] -tmp[319:256] := a[319:256] -tmp[383:320] := a[319:256] -tmp[447:384] := a[447:384] -tmp[511:448] := a[447:384] -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[63:0] := a[63:0] -tmp[127:64] := a[63:0] -tmp[191:128] := a[191:128] -tmp[255:192] := a[191:128] -tmp[319:256] := a[319:256] -tmp[383:320] := a[319:256] -tmp[447:384] := a[447:384] -tmp[511:448] := a[447:384] -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := tmp[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst". - -dst[63:0] := a[63:0] -dst[127:64] := a[63:0] -dst[191:128] := a[191:128] -dst[255:192] := a[191:128] -dst[319:256] := a[319:256] -dst[383:320] := a[319:256] -dst[447:384] := a[447:384] -dst[511:448] := a[447:384] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - - Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := b[63:0] -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := b[63:0] -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[31:0] := a[63:32] -tmp[63:32] := a[63:32] -tmp[95:64] := a[127:96] -tmp[127:96] := a[127:96] -tmp[159:128] := a[191:160] -tmp[191:160] := a[191:160] -tmp[223:192] := a[255:224] -tmp[255:224] := a[255:224] -tmp[287:256] := a[319:288] -tmp[319:288] := a[319:288] -tmp[351:320] := a[383:352] -tmp[383:352] := a[383:352] -tmp[415:384] := a[447:416] -tmp[447:416] := a[447:416] -tmp[479:448] := a[511:480] -tmp[511:480] := a[511:480] -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[31:0] := a[63:32] -tmp[63:32] := a[63:32] -tmp[95:64] := a[127:96] -tmp[127:96] := a[127:96] -tmp[159:128] := a[191:160] -tmp[191:160] := a[191:160] -tmp[223:192] := a[255:224] -tmp[255:224] := a[255:224] -tmp[287:256] := a[319:288] -tmp[319:288] := a[319:288] -tmp[351:320] := a[383:352] -tmp[383:352] := a[383:352] -tmp[415:384] := a[447:416] -tmp[447:416] := a[447:416] -tmp[479:448] := a[511:480] -tmp[511:480] := a[511:480] -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". - -dst[31:0] := a[63:32] -dst[63:32] := a[63:32] -dst[95:64] := a[127:96] -dst[127:96] := a[127:96] -dst[159:128] := a[191:160] -dst[191:160] := a[191:160] -dst[223:192] := a[255:224] -dst[255:224] := a[255:224] -dst[287:256] := a[319:288] -dst[319:288] := a[319:288] -dst[351:320] := a[383:352] -dst[383:352] := a[383:352] -dst[415:384] := a[447:416] -dst[447:416] := a[447:416] -dst[479:448] := a[511:480] -dst[511:480] := a[511:480] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -tmp[31:0] := a[31:0] -tmp[63:32] := a[31:0] -tmp[95:64] := a[95:64] -tmp[127:96] := a[95:64] -tmp[159:128] := a[159:128] -tmp[191:160] := a[159:128] -tmp[223:192] := a[223:192] -tmp[255:224] := a[223:192] -tmp[287:256] := a[287:256] -tmp[319:288] := a[287:256] -tmp[351:320] := a[351:320] -tmp[383:352] := a[351:320] -tmp[415:384] := a[415:384] -tmp[447:416] := a[415:384] -tmp[479:448] := a[479:448] -tmp[511:480] := a[479:448] -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -tmp[31:0] := a[31:0] -tmp[63:32] := a[31:0] -tmp[95:64] := a[95:64] -tmp[127:96] := a[95:64] -tmp[159:128] := a[159:128] -tmp[191:160] := a[159:128] -tmp[223:192] := a[223:192] -tmp[255:224] := a[223:192] -tmp[287:256] := a[287:256] -tmp[319:288] := a[287:256] -tmp[351:320] := a[351:320] -tmp[383:352] := a[351:320] -tmp[415:384] := a[415:384] -tmp[447:416] := a[415:384] -tmp[479:448] := a[479:448] -tmp[511:480] := a[479:448] -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". - -dst[31:0] := a[31:0] -dst[63:32] := a[31:0] -dst[95:64] := a[95:64] -dst[127:96] := a[95:64] -dst[159:128] := a[159:128] -dst[191:160] := a[159:128] -dst[223:192] := a[223:192] -dst[255:224] := a[223:192] -dst[287:256] := a[287:256] -dst[319:288] := a[287:256] -dst[351:320] := a[351:320] -dst[383:352] := a[351:320] -dst[415:384] := a[415:384] -dst[447:416] := a[415:384] -dst[479:448] := a[479:448] -dst[511:480] := a[479:448] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - - Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := b[31:0] -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := b[31:0] -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] AND b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] AND b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "a" when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 15 - i := j*32 - IF k[j] - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 15 - i := j*32 - IF k[j] - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 15 - i := j*32 - FOR h := 0 to 31 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "a" when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 7 - i := j*64 - IF k[j] - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 7 - i := j*64 - IF k[j] - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used according to "imm8", and the result is written to the corresponding bit in "dst". - -DEFINE TernaryOP(imm8, a, b, c) { - CASE imm8[7:0] OF - 0: dst[0] := 0 // imm8[7:0] := 0 - 1: dst[0] := NOT (a OR b OR c) // imm8[7:0] := NOT (_MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C) - // ... - 254: dst[0] := a OR b OR c // imm8[7:0] := _MM_TERNLOG_A OR _MM_TERNLOG_B OR _MM_TERNLOG_C - 255: dst[0] := 1 // imm8[7:0] := 1 - ESAC -} -imm8[7:0] = LogicExp(_MM_TERNLOG_A, _MM_TERNLOG_B, _MM_TERNLOG_C) -FOR j := 0 to 7 - i := j*64 - FOR h := 0 to 63 - dst[i+h] := TernaryOP(imm8[7:0], a[i+h], b[i+h], c[i+h]) - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 7 - i := j*64 - k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 15 - i := j*32 - k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero. - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero. - -FOR j := 0 to 7 - i := j*64 - k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - Broadcast 8-bit integer "a" to all elements of "dst". - -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := a[7:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - - - Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - - Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[31:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - Broadcast 32-bit integer "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - - - Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - - Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[63:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - Broadcast 64-bit integer "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - Broadcast the low packed 16-bit integer from "a" to all all elements of "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := a[15:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - Set packed 32-bit integers in "dst" with the repeated 4 element sequence. - -dst[31:0] := a -dst[63:32] := b -dst[95:64] := c -dst[127:96] := d -dst[159:128] := a -dst[191:160] := b -dst[223:192] := c -dst[255:224] := d -dst[287:256] := a -dst[319:288] := b -dst[351:320] := c -dst[383:352] := d -dst[415:384] := a -dst[447:416] := b -dst[479:448] := c -dst[511:480] := d -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - Set packed 64-bit integers in "dst" with the repeated 4 element sequence. - -dst[63:0] := a -dst[127:64] := b -dst[191:128] := c -dst[255:192] := d -dst[319:256] := a -dst[383:320] := b -dst[447:384] := c -dst[511:448] := d -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence. - -dst[63:0] := a -dst[127:64] := b -dst[191:128] := c -dst[255:192] := d -dst[319:256] := a -dst[383:320] := b -dst[447:384] := c -dst[511:448] := d -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence. - -dst[31:0] := a -dst[63:32] := b -dst[95:64] := c -dst[127:96] := d -dst[159:128] := a -dst[191:160] := b -dst[223:192] := c -dst[255:224] := d -dst[287:256] := a -dst[319:288] := b -dst[351:320] := c -dst[383:352] := d -dst[415:384] := a -dst[447:416] := b -dst[479:448] := c -dst[511:480] := d -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set packed 8-bit integers in "dst" with the supplied values. - -dst[7:0] := e0 -dst[15:8] := e1 -dst[23:16] := e2 -dst[31:24] := e3 -dst[39:32] := e4 -dst[47:40] := e5 -dst[55:48] := e6 -dst[63:56] := e7 -dst[71:64] := e8 -dst[79:72] := e9 -dst[87:80] := e10 -dst[95:88] := e11 -dst[103:96] := e12 -dst[111:104] := e13 -dst[119:112] := e14 -dst[127:120] := e15 -dst[135:128] := e16 -dst[143:136] := e17 -dst[151:144] := e18 -dst[159:152] := e19 -dst[167:160] := e20 -dst[175:168] := e21 -dst[183:176] := e22 -dst[191:184] := e23 -dst[199:192] := e24 -dst[207:200] := e25 -dst[215:208] := e26 -dst[223:216] := e27 -dst[231:224] := e28 -dst[239:232] := e29 -dst[247:240] := e30 -dst[255:248] := e31 -dst[263:256] := e32 -dst[271:264] := e33 -dst[279:272] := e34 -dst[287:280] := e35 -dst[295:288] := e36 -dst[303:296] := e37 -dst[311:304] := e38 -dst[319:312] := e39 -dst[327:320] := e40 -dst[335:328] := e41 -dst[343:336] := e42 -dst[351:344] := e43 -dst[359:352] := e44 -dst[367:360] := e45 -dst[375:368] := e46 -dst[383:376] := e47 -dst[391:384] := e48 -dst[399:392] := e49 -dst[407:400] := e50 -dst[415:408] := e51 -dst[423:416] := e52 -dst[431:424] := e53 -dst[439:432] := e54 -dst[447:440] := e55 -dst[455:448] := e56 -dst[463:456] := e57 -dst[471:464] := e58 -dst[479:472] := e59 -dst[487:480] := e60 -dst[495:488] := e61 -dst[503:496] := e62 -dst[511:504] := e63 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set packed 16-bit integers in "dst" with the supplied values. - -dst[15:0] := e0 -dst[31:16] := e1 -dst[47:32] := e2 -dst[63:48] := e3 -dst[79:64] := e4 -dst[95:80] := e5 -dst[111:96] := e6 -dst[127:112] := e7 -dst[143:128] := e8 -dst[159:144] := e9 -dst[175:160] := e10 -dst[191:176] := e11 -dst[207:192] := e12 -dst[223:208] := e13 -dst[239:224] := e14 -dst[255:240] := e15 -dst[271:256] := e16 -dst[287:272] := e17 -dst[303:288] := e18 -dst[319:304] := e19 -dst[335:320] := e20 -dst[351:336] := e21 -dst[367:352] := e22 -dst[383:368] := e23 -dst[399:384] := e24 -dst[415:400] := e25 -dst[431:416] := e26 -dst[447:432] := e27 -dst[463:448] := e28 -dst[479:464] := e29 -dst[495:480] := e30 -dst[511:496] := e31 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed 32-bit integers in "dst" with the supplied values. - -dst[31:0] := e0 -dst[63:32] := e1 -dst[95:64] := e2 -dst[127:96] := e3 -dst[159:128] := e4 -dst[191:160] := e5 -dst[223:192] := e6 -dst[255:224] := e7 -dst[287:256] := e8 -dst[319:288] := e9 -dst[351:320] := e10 -dst[383:352] := e11 -dst[415:384] := e12 -dst[447:416] := e13 -dst[479:448] := e14 -dst[511:480] := e15 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - Set packed 64-bit integers in "dst" with the supplied values. - -dst[63:0] := e0 -dst[127:64] := e1 -dst[191:128] := e2 -dst[255:192] := e3 -dst[319:256] := e4 -dst[383:320] := e5 -dst[447:384] := e6 -dst[511:448] := e7 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. - -dst[63:0] := e0 -dst[127:64] := e1 -dst[191:128] := e2 -dst[255:192] := e3 -dst[319:256] := e4 -dst[383:320] := e5 -dst[447:384] := e6 -dst[511:448] := e7 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. - -dst[31:0] := e0 -dst[63:32] := e1 -dst[95:64] := e2 -dst[127:96] := e3 -dst[159:128] := e4 -dst[191:160] := e5 -dst[223:192] := e6 -dst[255:224] := e7 -dst[287:256] := e8 -dst[319:288] := e9 -dst[351:320] := e10 -dst[383:352] := e11 -dst[415:384] := e12 -dst[447:416] := e13 -dst[479:448] := e14 -dst[511:480] := e15 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - Set packed 32-bit integers in "dst" with the repeated 4 element sequence in reverse order. - -dst[31:0] := d -dst[63:32] := c -dst[95:64] := b -dst[127:96] := a -dst[159:128] := d -dst[191:160] := c -dst[223:192] := b -dst[255:224] := a -dst[287:256] := d -dst[319:288] := c -dst[351:320] := b -dst[383:352] := a -dst[415:384] := d -dst[447:416] := c -dst[479:448] := b -dst[511:480] := a -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - Set packed 64-bit integers in "dst" with the repeated 4 element sequence in reverse order. - -dst[63:0] := d -dst[127:64] := c -dst[191:128] := b -dst[255:192] := a -dst[319:256] := d -dst[383:320] := c -dst[447:384] := b -dst[511:448] := a -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order. - -dst[63:0] := d -dst[127:64] := c -dst[191:128] := b -dst[255:192] := a -dst[319:256] := d -dst[383:320] := c -dst[447:384] := b -dst[511:448] := a -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order. - -dst[31:0] := d -dst[63:32] := c -dst[95:64] := b -dst[127:96] := a -dst[159:128] := d -dst[191:160] := c -dst[223:192] := b -dst[255:224] := a -dst[287:256] := d -dst[319:288] := c -dst[351:320] := b -dst[383:352] := a -dst[415:384] := d -dst[447:416] := c -dst[479:448] := b -dst[511:480] := a -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed 32-bit integers in "dst" with the supplied values in reverse order. - -dst[31:0] := e15 -dst[63:32] := e14 -dst[95:64] := e13 -dst[127:96] := e12 -dst[159:128] := e11 -dst[191:160] := e10 -dst[223:192] := e9 -dst[255:224] := e8 -dst[287:256] := e7 -dst[319:288] := e6 -dst[351:320] := e5 -dst[383:352] := e4 -dst[415:384] := e3 -dst[447:416] := e2 -dst[479:448] := e1 -dst[511:480] := e0 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - Set packed 64-bit integers in "dst" with the supplied values in reverse order. - -dst[63:0] := e7 -dst[127:64] := e6 -dst[191:128] := e5 -dst[255:192] := e4 -dst[319:256] := e3 -dst[383:320] := e2 -dst[447:384] := e1 -dst[511:448] := e0 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst[63:0] := e7 -dst[127:64] := e6 -dst[191:128] := e5 -dst[255:192] := e4 -dst[319:256] := e3 -dst[383:320] := e2 -dst[447:384] := e1 -dst[511:448] := e0 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst[31:0] := e15 -dst[63:32] := e14 -dst[95:64] := e13 -dst[127:96] := e12 -dst[159:128] := e11 -dst[191:160] := e10 -dst[223:192] := e9 -dst[255:224] := e8 -dst[287:256] := e7 -dst[319:288] := e6 -dst[351:320] := e5 -dst[383:352] := e4 -dst[415:384] := e3 -dst[447:416] := e2 -dst[479:448] := e1 -dst[511:480] := e0 -dst[MAX:512] := 0 - - AVX512F -
immintrin.h
- Set -
- - - - Return vector of type __m512 with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - Return vector of type __m512i with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - Return vector of type __m512d with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - Return vector of type __m512 with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - Return vector of type __m512i with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX512F -
immintrin.h
- Set -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE LEFT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src << count) OR (src >> (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE LEFT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src << count) OR (src >> (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst". - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE RIGHT_ROTATE_DWORDS(src, count_src) { - count := count_src % 32 - RETURN (src >>count) OR (src << (32 - count)) -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst". - -DEFINE RIGHT_ROTATE_QWORDS(src, count_src) { - count := count_src % 64 - RETURN (src >> count) OR (src << (64 - count)) -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0) - ELSE - dst[i+63:i] := SignExtend64(a[i+63:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := SignExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF count[i+63:i] < 64 - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := (1.0 / a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (1.0 / a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. - -IF k[0] - dst[63:0] := (1.0 / b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. - -IF k[0] - dst[63:0] := (1.0 / b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. - -dst[63:0] := (1.0 / b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. - -IF k[0] - dst[31:0] := (1.0 / b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. - -IF k[0] - dst[31:0] := (1.0 / b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. - -dst[31:0] := (1.0 / b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := (1.0 / SQRT(a[i+63:i])) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14. - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. - -IF k[0] - dst[63:0] := (1.0 / SQRT(b[63:0])) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. - -IF k[0] - dst[63:0] := (1.0 / SQRT(b[63:0])) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14. - -dst[63:0] := (1.0 / SQRT(b[63:0])) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. - -IF k[0] - dst[31:0] := (1.0 / SQRT(b[31:0])) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. - -IF k[0] - dst[31:0] := (1.0 / SQRT(b[31:0])) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14. - -dst[31:0] := (1.0 / SQRT(b[31:0])) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note]. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := SQRT(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SQRT(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - [round_note]. - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := SQRT(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := SQRT(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SQRT(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - [round_note]. - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := SQRT(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - - Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := SQRT(b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := SQRT(b[63:0]) -ELSE - dst[63:0] := src[63:0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst[63:0] := SQRT(b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst[63:0] := SQRT(b[63:0]) -ELSE - dst[63:0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := SQRT(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - - Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := SQRT(b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := SQRT(b[31:0]) -ELSE - dst[31:0] := src[31:0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst[31:0] := SQRT(b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst[31:0] := SQRT(b[31:0]) -ELSE - dst[31:0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := SQRT(b[31:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512F -
immintrin.h
- Elementary Math Functions -
- - - - Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512d to type __m128d. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512 to type __m128. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512d to type __m256d. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512 to type __m256. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512i to type __m128i. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512i to type __m256i. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Return vector of type __m512 with undefined elements. - AVX512F -
immintrin.h
- General Support -
- - - Return vector of type __m512i with undefined elements. - AVX512F -
immintrin.h
- General Support -
- - - Return vector of type __m512d with undefined elements. - AVX512F -
immintrin.h
- General Support -
- - - Return vector of type __m512 with undefined elements. - AVX512F -
immintrin.h
- General Support -
- - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] + b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] + b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] + b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := c[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := c[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] * b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] * b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). RM. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] * b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] * b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] + b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". - -FOR j := 0 to 15 - i := j*32 - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] - b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - [round_note] - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] - b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 32-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[31:0] + src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_ADD(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := 0 - FI -ENDFOR -dst[31:0] := REDUCE_ADD(tmp, 16) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 64-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[63:0] + src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_ADD(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := 0 - FI -ENDFOR -dst[63:0] := REDUCE_ADD(tmp, 8) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[63:0] + src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_ADD(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := 0 - FI -ENDFOR -dst[63:0] := REDUCE_ADD(tmp, 8) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[31:0] + src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_ADD(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := 0 - FI -ENDFOR -dst[31:0] := REDUCE_ADD(tmp, 16) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 32-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[31:0] * src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_MUL(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := 1 - FI -ENDFOR -dst[31:0] := REDUCE_MUL(tmp, 16) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed 64-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[63:0] * src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_MUL(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := 1 - FI -ENDFOR -dst[63:0] := REDUCE_MUL(tmp, 8) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[63:0] * src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_MUL(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := 1.0 - FI -ENDFOR -dst[63:0] := REDUCE_MUL(tmp, 8) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[31:0] * src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_MUL(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := FP32(1.0) - FI -ENDFOR -dst[31:0] := REDUCE_MUL(tmp, 16) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 32-bit integers in "a" by addition. Returns the sum of all elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[31:0] + src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_ADD(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_ADD(a, 16) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 64-bit integers in "a" by addition. Returns the sum of all elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[63:0] + src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_ADD(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_ADD(a, 8) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[63:0] + src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] + src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_ADD(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_ADD(a, 8) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". - -DEFINE REDUCE_ADD(src, len) { - IF len == 2 - RETURN src[31:0] + src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_ADD(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_ADD(a, 16) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 32-bit integers in "a" by multiplication. Returns the product of all elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[31:0] * src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_MUL(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_MUL(a, 16) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Reduce the packed 64-bit integers in "a" by multiplication. Returns the product of all elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[63:0] * src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_MUL(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_MUL(a, 8) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[63:0] * src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] * src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_MUL(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_MUL(a, 8) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". - -DEFINE REDUCE_MUL(src, len) { - IF len == 2 - RETURN src[31:0] * src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] * src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_MUL(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_MUL(a, 16) - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ABS(v2[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ABS(v2[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ABS(v2[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ABS(v2[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 64 bytes (16 elements) in "dst". - -temp[1023:512] := a[511:0] -temp[511:0] := b[511:0] -temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) -dst[511:0] := temp[511:0] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 64 bytes (16 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -temp[1023:512] := a[511:0] -temp[511:0] := b[511:0] -temp[1023:0] := temp[1023:0] >> (32*imm8[3:0]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := temp[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - [sae_note] - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - [sae_note] - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - [sae_note] - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - [sae_note] - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note] - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign. - [getmant_note][sae_note] - FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_mask_permutexvar_epi32", and it is recommended that you use that intrinsic name. - -FOR j := 0 to 15 - i := j*32 - id := idx[i+3:i]*32 - IF k[j] - dst[i+31:i] := a[id+31:id] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_permutexvar_epi32", and it is recommended that you use that intrinsic name. - -FOR j := 0 to 15 - i := j*32 - id := idx[i+3:i]*32 - dst[i+31:i] := a[id+31:id] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4]) -tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6]) -tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4]) -tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6]) -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := tmp_dst[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -dst[127:96] := SELECT4(a[127:0], imm8[7:6]) -dst[159:128] := SELECT4(a[255:128], imm8[1:0]) -dst[191:160] := SELECT4(a[255:128], imm8[3:2]) -dst[223:192] := SELECT4(a[255:128], imm8[5:4]) -dst[255:224] := SELECT4(a[255:128], imm8[7:6]) -dst[287:256] := SELECT4(a[383:256], imm8[1:0]) -dst[319:288] := SELECT4(a[383:256], imm8[3:2]) -dst[351:320] := SELECT4(a[383:256], imm8[5:4]) -dst[383:352] := SELECT4(a[383:256], imm8[7:6]) -dst[415:384] := SELECT4(a[511:384], imm8[1:0]) -dst[447:416] := SELECT4(a[511:384], imm8[3:2]) -dst[479:448] := SELECT4(a[511:384], imm8[5:4]) -dst[511:480] := SELECT4(a[511:384], imm8[7:6]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Swizzle -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - i := j*64 - k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - i := j*64 - k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := (a[i+63:i] <= b[i+63:i]) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := (a[i+63:i] < b[i+63:i]) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := (!(a[i+63:i] <= b[i+63:i])) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k". - -FOR j := 0 to 7 - i := j*64 - k[j] := (!(a[i+63:i] < b[i+63:i])) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k". - FOR j := 0 to 7 - i := j*64 - k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k". - FOR j := 0 to 7 - i := j*64 - k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := (a[i+63:i] <= b[i+63:i]) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := (a[i+63:i] < b[i+63:i]) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := (!(a[i+63:i] <= b[i+63:i])) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := (!(a[i+63:i] < b[i+63:i])) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - FOR j := 0 to 7 - i := j*64 - IF k1[j] - k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 15 - i := j*32 - k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 15 - i := j*32 - k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := (a[i+31:i] <= b[i+31:i]) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := (a[i+31:i] < b[i+31:i]) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := (!(a[i+31:i] <= b[i+31:i])) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := (!(a[i+31:i] < b[i+31:i])) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k". - FOR j := 0 to 15 - i := j*32 - k[j] := ((a[i+31:i] != NaN) AND (b[i+31:i] != NaN)) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k". - FOR j := 0 to 15 - i := j*32 - k[j] := ((a[i+31:i] == NaN) OR (b[i+31:i] == NaN)) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := (a[i+31:i] <= b[i+31:i]) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := (a[i+31:i] < b[i+31:i]) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := (!(a[i+31:i] <= b[i+31:i])) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := (!(a[i+31:i] < b[i+31:i])) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ((a[i+31:i] != NaN) AND (b[i+31:i] != NaN)) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ((a[i+31:i] == NaN) OR (b[i+31:i] == NaN)) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k". - -FOR j := 0 to 15 - i := j*32 - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[2:0]) OF -0: OP := _MM_CMPINT_EQ -1: OP := _MM_CMPINT_LT -2: OP := _MM_CMPINT_LE -3: OP := _MM_CMPINT_FALSE -4: OP := _MM_CMPINT_NE -5: OP := _MM_CMPINT_NLT -6: OP := _MM_CMPINT_NLE -7: OP := _MM_CMPINT_TRUE -ESAC -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] >= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] > b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] <= b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] < b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Compare -
- - - - - - Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 15 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 15 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits of integer data from memory into "dst". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 15 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 15 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+31:i] := MEM[addr+31:addr] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Loads 8 64-bit integer elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" and stores them in "dst". - -FOR j := 0 to 7 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Loads 8 64-bit integer elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Loads 8 double-precision (64-bit) floating-point elements stored at memory locations starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" them in "dst". - -FOR j := 0 to 7 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - - - Loads 8 double-precision (64-bit) floating-point elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - dst[i+63:i] := MEM[addr+63:addr] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Load -
- - - - - - Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Move -
- - - - - - Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store packed 32-bit integers from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 15 - i := j*32 - IF k[j] - MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits of integer data from "a" into memory. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - - Store packed 64-bit integers from "a" into memory using writemask "k". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -FOR j := 0 to 7 - i := j*64 - IF k[j] - MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 15 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 15 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 15 - i := j*32 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8. - -FOR j := 0 to 15 - i := j*32 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+31:addr] := a[i+31:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Stores 8 packed double-precision (64-bit) floating-point elements in "a" and to memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". - -FOR j := 0 to 7 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Stores 8 packed double-precision (64-bit) floating-point elements in "a" to memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory. - -FOR j := 0 to 7 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] AND b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[511:0] := (a[511:0] AND b[511:0]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise NOT of 512 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". - -dst[511:0] := ((NOT a[511:0]) AND b[511:0]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in "a" and then AND with "b", and store the results in "dst". - -dst[511:0] := ((NOT a[511:0]) AND b[511:0]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in "a" and "b", and store the results in "dst". - -dst[511:0] := (a[511:0] AND b[511:0]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] AND b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] OR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] OR b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[511:0] := (a[511:0] OR b[511:0]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] OR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the resut in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] OR b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero. - -FOR j := 0 to 15 - i := j*32 - IF k1[j] - k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero. - -FOR j := 0 to 15 - i := j*32 - k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[511:0] := (a[511:0] XOR b[511:0]) -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - Reduce the packed 32-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[31:0] AND src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] AND src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_AND(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := 0xFFFFFFFF - FI -ENDFOR -dst[31:0] := REDUCE_AND(tmp, 16) - - AVX512F -
immintrin.h
- Logical -
- - - - - Reduce the packed 64-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[63:0] AND src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] AND src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_AND(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := 0xFFFFFFFFFFFFFFFF - FI -ENDFOR -dst[63:0] := REDUCE_AND(tmp, 8) - - AVX512F -
immintrin.h
- Logical -
- - - - - Reduce the packed 32-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[31:0] OR src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] OR src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_OR(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := 0 - FI -ENDFOR -dst[31:0] := REDUCE_OR(tmp, 16) - - AVX512F -
immintrin.h
- Logical -
- - - - - Reduce the packed 64-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[63:0] OR src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] OR src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_OR(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := 0 - FI -ENDFOR -dst[63:0] := REDUCE_OR(tmp, 8) - - AVX512F -
immintrin.h
- Logical -
- - - - Reduce the packed 32-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[31:0] AND src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] AND src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_AND(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_AND(a, 16) - - AVX512F -
immintrin.h
- Logical -
- - - - Reduce the packed 64-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a". - -DEFINE REDUCE_AND(src, len) { - IF len == 2 - RETURN src[63:0] AND src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] AND src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_AND(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_AND(a, 8) - - AVX512F -
immintrin.h
- Logical -
- - - - Reduce the packed 32-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[31:0] OR src[63:32] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := src[i+31:i] OR src[i+32*len+31:i+32*len] - ENDFOR - RETURN REDUCE_OR(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_OR(a, 16) - - AVX512F -
immintrin.h
- Logical -
- - - - Reduce the packed 64-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a". - -DEFINE REDUCE_OR(src, len) { - IF len == 2 - RETURN src[63:0] OR src[127:64] - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := src[i+63:i] OR src[i+64*len+63:i+64*len] - ENDFOR - RETURN REDUCE_OR(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_OR(a, 8) - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Performs element-by-element bitwise AND between packed 32-bit integer elements of "v2" and "v3", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := v2[i+31:i] & v3[i+31:i] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Logical -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MAX(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := Int32(-0x80000000) - FI -ENDFOR -dst[31:0] := REDUCE_MAX(tmp, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MAX(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := Int64(-0x8000000000000000) - FI -ENDFOR -dst[63:0] := REDUCE_MAX(tmp, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MAX(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := 0 - FI -ENDFOR -dst[31:0] := REDUCE_MAX(tmp, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MAX(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := 0 - FI -ENDFOR -dst[63:0] := REDUCE_MAX(tmp, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MAX(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := Cast_FP64(0xFFEFFFFFFFFFFFFF) - FI -ENDFOR -dst[63:0] := REDUCE_MAX(tmp, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MAX(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := Cast_FP32(0xFF7FFFFF) - FI -ENDFOR -dst[31:0] := REDUCE_MAX(tmp, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MIN(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := Int32(0x7FFFFFFF) - FI -ENDFOR -dst[31:0] := REDUCE_MIN(tmp, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed signed 64-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MIN(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := Int64(0x7FFFFFFFFFFFFFFF) - FI -ENDFOR -dst[63:0] := REDUCE_MIN(tmp, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MIN(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := 0xFFFFFFFF - FI -ENDFOR -dst[31:0] := REDUCE_MIN(tmp, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed unsigned 64-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MIN(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := 0xFFFFFFFFFFFFFFFF - FI -ENDFOR -dst[63:0] := REDUCE_MIN(tmp, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". [min_float_note] - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MIN(src[64*len-1:0], len) -} -tmp := a -FOR j := 0 to 8 - i := j*64 - IF k[j] - tmp[i+63:i] := a[i+63:i] - ELSE - tmp[i+63:i] := Cast_FP64(0x7FEFFFFFFFFFFFFF) - FI -ENDFOR -dst[63:0] := REDUCE_MIN(tmp, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a". [min_float_note] - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MIN(src[32*len-1:0], len) -} -tmp := a -FOR j := 0 to 16 - i := j*32 - IF k[j] - tmp[i+31:i] := a[i+31:i] - ELSE - tmp[i+31:i] := Cast_FP32(0x7F7FFFFF) - FI -ENDFOR -dst[31:0] := REDUCE_MIN(tmp, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MAX(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_MAX(a, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MAX(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_MAX(a, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MAX(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_MAX(a, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MAX(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_MAX(a, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[63:0] > src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] > src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MAX(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_MAX(a, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". - -DEFINE REDUCE_MAX(src, len) { - IF len == 2 - RETURN (src[31:0] > src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] > src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MAX(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_MAX(a, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MIN(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_MIN(a, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed signed 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MIN(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_MIN(a, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MIN(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_MIN(a, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed unsigned 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a". - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MIN(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_MIN(a, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed double-precision (64-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[63:0] < src[127:64] ? src[63:0] : src[127:64]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*64 - src[i+63:i] := (src[i+63:i] < src[i+64*len+63:i+64*len] ? src[i+63:i] : src[i+64*len+63:i+64*len]) - ENDFOR - RETURN REDUCE_MIN(src[64*len-1:0], len) -} -dst[63:0] := REDUCE_MIN(a, 8) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - Reduce the packed single-precision (32-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] - -DEFINE REDUCE_MIN(src, len) { - IF len == 2 - RETURN (src[31:0] < src[63:32] ? src[31:0] : src[63:32]) - FI - len := len / 2 - FOR j:= 0 to (len-1) - i := j*32 - src[i+31:i] := (src[i+31:i] < src[i+32*len+31:i+32*len] ? src[i+31:i] : src[i+32*len+31:i+32*len]) - ENDFOR - RETURN REDUCE_MIN(src[32*len-1:0], len) -} -dst[31:0] := REDUCE_MIN(a, 16) - - AVX512F -
immintrin.h
- Special Math Functions -
- - - - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 15 - i := j*32 - IF count[i+31:i] < 32 - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Shift -
- - - - Cast vector of type __m512d to type __m512. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512d to type __m512i. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512 to type __m512d. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512 to type __m512i. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512i to type __m512d. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Cast vector of type __m512i to type __m512. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512F -
immintrin.h
- Cast -
- - - - Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". - -FOR j := 0 to 7 - i := j*32 - n := j*64 - dst[n+63:n] := Convert_FP32_To_FP64(v2[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[l+63:l] := Convert_FP32_To_FP64(v2[i+31:i]) - ELSE - dst[l+63:l] := src[l+63:l] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". - -FOR j := 0 to 7 - i := j*32 - l := j*64 - dst[l+63:l] := Convert_Int32_To_FP64(v2[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - n := j*64 - IF k[j] - dst[n+63:n] := Convert_Int32_To_FP64(v2[i+31:i]) - ELSE - dst[n+63:n] := src[n+63:n] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst". - -FOR j := 0 to 7 - i := j*32 - n := j*64 - dst[n+63:n] := Convert_Int32_To_FP64(v2[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - l := j*64 - IF k[j] - dst[l+63:l] := Convert_Int32_To_FP64(v2[i+31:i]) - ELSE - dst[l+63:l] := src[l+63:l] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst". The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. - -FOR j := 0 to 7 - i := j*64 - k := j*32 - dst[k+31:k] := Convert_FP64_To_FP32(v2[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. - -FOR j := 0 to 7 - i := j*64 - l := j*32 - IF k[j] - dst[l+31:l] := Convert_FP64_To_FP32(v2[i+63:i]) - ELSE - dst[l+31:l] := src[l+31:l] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512F -
immintrin.h
- Convert -
- - - - - - - Stores 8 packed 64-bit integer elements located in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". - -FOR j := 0 to 7 - i := j*64 - m := j*32 - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - - Stores 8 packed 64-bit integer elements located in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using writemask "k" (elements whose corresponding mask bit is not set are not written to memory). - -FOR j := 0 to 7 - i := j*64 - m := j*32 - IF k[j] - addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 - MEM[addr+63:addr] := a[i+63:i] - FI -ENDFOR - - - AVX512F -
immintrin.h
- Store -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512IFMA52 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512IFMA52 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512IFMA52 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512IFMA52 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*64 - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512IFMA52 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512IFMA52 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i]) - dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512IFMA52 -
immintrin.h
- Arithmetic -
- - - - - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := POPCNT(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := POPCNT(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := POPCNT(a[i+63:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := POPCNT(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := POPCNT(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := POPCNT(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := POPCNT(a[i+31:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := POPCNT(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := POPCNT(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := POPCNT(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := POPCNT(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := POPCNT(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512VPOPCNTDQ - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := POPCNT(a[i+31:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512VPOPCNTDQ -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := POPCNT(a[i+31:i]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512VPOPCNTDQ -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := POPCNT(a[i+31:i]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512VPOPCNTDQ -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := POPCNT(a[i+63:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512VPOPCNTDQ -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := POPCNT(a[i+63:i]) - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512VPOPCNTDQ -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := POPCNT(a[i+63:i]) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512VPOPCNTDQ -
immintrin.h
- Bit Manipulation -
- - - - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 15 - i := j*32 - m := j*16 - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) -ENDFOR -dst[MAX:512] := 0 - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 15 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 15 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - Convert the BF16 (16-bit) floating-point element in "a" to a floating-point element, and store the result in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -dst[31:0] := Convert_BF16_To_FP32(a[15:0]) - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". - -FOR j := 0 to 31 - IF j < 16 - t := b.fp32[j] - ELSE - t := a.fp32[j-16] - FI - dst.word[j] := Convert_FP32_To_BF16(t) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - IF j < 16 - t := b.fp32[j] - ELSE - t := a.fp32[j-16] - FI - dst.word[j] := Convert_FP32_To_BF16(t) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - IF j < 16 - t := b.fp32[j] - ELSE - t := a.fp32[j-16] - FI - dst.word[j] := Convert_FP32_To_BF16(t) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 15 - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Convert -
- - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 15 - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 15 - IF k[j] - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 15 - IF k[j] - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BF16 - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 3 - i := j*32 - m := j*16 - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) -ENDFOR -dst[MAX:128] := 0 - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 3 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 3 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 7 - i := j*32 - m := j*16 - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) -ENDFOR -dst[MAX:256] := 0 - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 7 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed BF16 (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic neither raises any floating point exceptions nor turns sNAN into qNAN. - -FOR j := 0 to 7 - i := j*32 - m := j*16 - IF k[j] - dst[i+31:i] := Convert_BF16_To_FP32(a[m+15:m]) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert the single-precision (32-bit) floating-point element in "a" to a BF16 (16-bit) floating-point element, and store the result in "dst". - -dst[15:0] := Convert_FP32_To_BF16(a[31:0]) - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". - -FOR j := 0 to 7 - IF j < 4 - t := b.fp32[j] - ELSE - t := a.fp32[j-4] - FI - dst.word[j] := Convert_FP32_To_BF16(t) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - IF j < 4 - t := b.fp32[j] - ELSE - t := a.fp32[j-4] - FI - dst.word[j] := Convert_FP32_To_BF16(t) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - IF j < 4 - t := b.fp32[j] - ELSE - t := a.fp32[j-4] - FI - dst.word[j] := Convert_FP32_To_BF16(t) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst". - -FOR j := 0 to 15 - IF j < 8 - t := b.fp32[j] - ELSE - t := a.fp32[j-8] - FI - dst.word[j] := Convert_FP32_To_BF16(t) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - IF j < 8 - t := b.fp32[j] - ELSE - t := a.fp32[j-8] - FI - dst.word[j] := Convert_FP32_To_BF16(t) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - IF j < 8 - t := b.fp32[j] - ELSE - t := a.fp32[j-8] - FI - dst.word[j] := Convert_FP32_To_BF16(t) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 3 - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 3 - IF k[j] - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 3 - IF k[j] - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst". - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 7 - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 7 - IF k[j] - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE make_fp32(x[15:0]) { - y.fp32 := 0.0 - y[31:16] := x[15:0] - RETURN y -} -dst := src -FOR j := 0 to 7 - IF k[j] - dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1]) - dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BF16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - - Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 3 //Qword - FOR j := 0 to 7 // Byte - IF k[i*8+j] - m := c.qword[i].byte[j] & 0x3F - dst[i*8+j] := b.qword[i].bit[m] - ELSE - dst[i*8+j] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:32] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". - -FOR i := 0 to 3 //Qword - FOR j := 0 to 7 // Byte - m := c.qword[i].byte[j] & 0x3F - dst[i*8+j] := b.qword[i].bit[m] - ENDFOR -ENDFOR -dst[MAX:32] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 1 //Qword - FOR j := 0 to 7 // Byte - IF k[i*8+j] - m := c.qword[i].byte[j] & 0x3F - dst[i*8+j] := b.qword[i].bit[m] - ELSE - dst[i*8+j] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:16] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". - -FOR i := 0 to 1 //Qword - FOR j := 0 to 7 // Byte - m := c.qword[i].byte[j] & 0x3F - dst[i*8+j] := b.qword[i].bit[m] - ENDFOR -ENDFOR -dst[MAX:16] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := POPCNT(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := POPCNT(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := POPCNT(a[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := POPCNT(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := POPCNT(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := POPCNT(a[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 31 - i := j*8 - dst[i+7:i] := POPCNT(a[i+7:i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := POPCNT(a[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := POPCNT(a[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := POPCNT(a[i+7:i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := POPCNT(a[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := POPCNT(a[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_BITALG - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - - - Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 7 //Qword - FOR j := 0 to 7 // Byte - IF k[i*8+j] - m := c.qword[i].byte[j] & 0x3F - dst[i*8+j] := b.qword[i].bit[m] - ELSE - dst[i*8+j] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:64] := 0 - - - AVX512_BITALG -
immintrin.h
- Bit Manipulation -
- - - - - Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst". - -FOR i := 0 to 7 //Qword - FOR j := 0 to 7 // Byte - m := c.qword[i].byte[j] & 0x3F - dst[i*8+j] := b.qword[i].bit[m] - ENDFOR -ENDFOR -dst[MAX:64] := 0 - - - AVX512_BITALG -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := POPCNT(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BITALG -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := POPCNT(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BITALG -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := POPCNT(a[i+15:i]) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BITALG -
immintrin.h
- Bit Manipulation -
- - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst". - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 63 - i := j*8 - dst[i+7:i] := POPCNT(a[i+7:i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BITALG -
immintrin.h
- Bit Manipulation -
- - - - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := POPCNT(a[i+7:i]) - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BITALG -
immintrin.h
- Bit Manipulation -
- - - - - Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE POPCNT(a) { - count := 0 - DO WHILE a > 0 - count += a[0] - a >>= 1 - OD - RETURN count -} -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := POPCNT(a[i+7:i]) - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_BITALG -
immintrin.h
- Bit Manipulation -
- - - - - Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ACOS(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ACOSH(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ASIN(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ASINH(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ATAN(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ATANH(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math FunctionsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := CubeRoot(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := CDFNormal(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := InverseCDFNormal(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := COS(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := COSD(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := COSH(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ERF(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := 1.0 - ERF(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := 1.0 / ERF(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := POW(FP16(e), a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math FunctionsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := InvCubeRoot(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math FunctionsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := InvSQRT(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := LOG(1.0 + a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - - Elementary Math FunctionsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ConvertExpFP16(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := SIN(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := SIN(a[i+15:i]) - MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -cos_res[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := SIND(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := SINH(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := CEIL(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := FLOOR(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ROUND(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". - - Elementary Math Functions -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := SQRT(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := TAN(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := TAND(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := TANH(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst" - - Special Math FunctionsFOR j := 0 to 15 - i := j*16 - dst[i+15:i] := TRUNCATE(a[i+15:i]) -ENDFOR -dst[MAX:256] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ACOS(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ACOSH(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ASIN(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ASINH(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ATAN(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperblic tangent of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" expressed in radians. - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ATANH(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math FunctionsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := CubeRoot(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := CDFNormal(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := InverseCDFNormal(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := CEIL(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := COS(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := COSD(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := COSH(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ERF(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := 1.0 - ERF(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := 1.0 / ERF(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := POW(FP16(e), a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := FLOOR(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math FunctionsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := InvSQRT(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := LOG(1.0 + a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - - Elementary Math FunctionsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ConvertExpFP16(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ACOS(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ACOSH(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ASIN(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ASINH(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ATAN(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ATANH(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math FunctionsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := CubeRoot(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := CDFNormal(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := InverseCDFNormal(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Special Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := CEIL(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := COS(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - TrigonometryFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := COSD(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := COSH(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ERF(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := 1.0 - ERF(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Probability/StatisticsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := 1.0 / ERF(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := POW(FP16(e), a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Special Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := FLOOR(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math FunctionsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := InvSQRT(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := LOG(1.0 + a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := LOG(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - - - - Elementary Math FunctionsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ConvertExpFP16(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Rounds each packed half-precision (16-bit) floating-point element in "a" to the nearest integer value and stores the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Special Math FunctionsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := NearbyInt(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Computes the reciprocal of packed half-precision (16-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := (1.0 / a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Rounds the packed half-precision (16-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Special Math FunctionsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := RoundToNearestEven(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := SIN(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set). - - - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := SIN(a[i+15:i]) - MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) - ELSE - dst[i+15:i] := sin_src[i+15:i] - MEM[mem_addr+i+15:mem_addr+i] := cos_src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -cos_res[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - TrigonometryFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := SIND(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := SINH(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Special Math Functions -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ROUND(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := TAN(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - TrigonometryFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := TAND(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := TANH(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - - - - Special Math FunctionsFOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := TRUNCATE(a[i+15:i]) - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Rounds each packed half-precision (16-bit) floating-point element in "a" to the nearest integer value and stores the results as packed half-precision floating-point elements in "dst". - - Special Math FunctionsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := NearbyInt(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Computes the reciprocal of packed half-precision (16-bit) floating-point elements in "a", storing the results in "dst". - - Elementary Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := (1.0 / a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Rounds the packed half-precision (16-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst". - - Special Math FunctionsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := RoundToNearestEven(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := SIN(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := SIN(a[i+15:i]) - MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -cos_res[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := SIND(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := SINH(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ROUND(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := TAN(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := TAND(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := TANH(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst". - - Special Math FunctionsFOR j := 0 to 31 - i := j*16 - dst[i+15:i] := TRUNCATE(a[i+15:i]) -ENDFOR -dst[MAX:512] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ACOS(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ACOSH(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ASIN(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ASINH(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ATAN2(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ATAN(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ATANH(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math FunctionsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := CubeRoot(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := CDFNormal(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cumulative distribution function of packed half-precision (16-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := InverseCDFNormal(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := COS(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := COSD(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := COSH(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ERF(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := 1.0 - ERF(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse complementary error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := 1.0 / (1.0 - ERF(a[i+15:i])) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse error function of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Probability/StatisticsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := 1.0 / ERF(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of 10 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := POW(FP16(10.0), a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of 2 raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := POW(FP16(2.0), a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := POW(FP16(e), a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of "e" raised to the power of packed half-precision (16-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := POW(FP16(e), a[i+15:i]) - 1.0 -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := SQRT(POW(a[i+15:i], 2.0) + POW(b[i+15:i], 2.0)) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse cube root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math FunctionsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := InvCubeRoot(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the inverse square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math FunctionsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := InvSQRT(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the base-10 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) / LOG(10.0) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the natural logarithm of one plus packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := LOG(1.0 + a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the base-2 logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) / LOG(2.0) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the natural logarithm of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := LOG(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - - Elementary Math FunctionsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ConvertExpFP16(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the exponential value of packed half-precision (16-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := POW(a[i+15:i], b[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := SIN(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine and cosine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := SIN(a[i+15:i]) - MEM[mem_addr+i+15:mem_addr+i] := COS(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -cos_res[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the sine of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := SIND(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic sine of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := SINH(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" up to an integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := CEIL(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" down to an integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := FLOOR(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Round the packed half-precision (16-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed half-precision floating-point elements in "dst". - - Special Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ROUND(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". - - Elementary Math Functions -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := SQRT(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := TAN(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - - TrigonometryFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := TAND(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Compute the hyperbolic tangent of packed half-precision (16-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - - Trigonometry -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := TANH(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - Truncate the packed half-precision (16-bit) floating-point elements in "a", and store the results as packed half-precision floating-point elements in "dst". - - Special Math FunctionsFOR j := 0 to 7 - i := j*16 - dst[i+15:i] := TRUNCATE(a[i+15:i]) -ENDFOR -dst[MAX:128] := 0 -
immintrin.h
AVX512_FP16
- - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := a.fp16[j] + b.fp16[j] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := a.fp16[j] + b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := a.fp16[j] + b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 TO 15 - dst.fp16[j] := a.fp16[j] + b.fp16[j] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := a.fp16[j] + b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := a.fp16[j] + b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 7 - dst.fp16[j] := a.fp16[j] / b.fp16[j] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := a.fp16[j] / b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := a.fp16[j] / b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 15 - dst.fp16[j] := a.fp16[j] / b.fp16[j] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := a.fp16[j] / b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := a.fp16[j] / b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 7 - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 15 - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 7 - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 15 - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 15 - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 15 - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 15 - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 15 - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := a.fp16[j] - b.fp16[j] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := a.fp16[j] - b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := a.fp16[j] - b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 TO 15 - dst.fp16[j] := a.fp16[j] - b.fp16[j] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := a.fp16[j] - b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := a.fp16[j] - b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR i := 0 TO 7 - dst.fp16[i] := a.fp16[i] * b.fp16[i] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 TO 7 - IF k[i] - dst.fp16[i] := a.fp16[i] * b.fp16[i] - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 TO 7 - IF k[i] - dst.fp16[i] := a.fp16[i] * b.fp16[i] - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR i := 0 TO 15 - dst.fp16[i] := a.fp16[i] * b.fp16[i] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 TO 15 - IF k[i] - dst.fp16[i] := a.fp16[i] * b.fp16[i] - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 TO 15 - IF k[i] - dst.fp16[i] := a.fp16[i] * b.fp16[i] - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := a.fp16[2*i+0] - dst.fp16[2*i+1] := a.fp16[2*i+1] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := c.fp16[2*i+0] - dst.fp16[2*i+1] := c.fp16[2*i+1] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := a.fp16[2*i+0] - dst.fp16[2*i+1] := a.fp16[2*i+1] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := c.fp16[2*i+0] - dst.fp16[2*i+1] := c.fp16[2*i+1] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := a.fp16[2*i+0] - dst.fp16[2*i+1] := a.fp16[2*i+1] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := c.fp16[2*i+0] - dst.fp16[2*i+1] := c.fp16[2*i+1] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 3 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := a.fp16[2*i+0] - dst.fp16[2*i+1] := a.fp16[2*i+1] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := c.fp16[2*i+0] - dst.fp16[2*i+1] := c.fp16[2*i+1] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 7 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". - -tmp := a -FOR i := 0 to 7 - tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+8] -ENDFOR -FOR i := 0 to 3 - tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] -ENDFOR -dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (316-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". - -tmp := a -FOR i := 0 to 7 - tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+8] -ENDFOR -FOR i := 0 to 3 - tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] -ENDFOR -dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". - -tmp := a -FOR i := 0 to 7 - tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) -ENDFOR -FOR i := 0 to 3 - tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) -ENDFOR -dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". - -tmp := a -FOR i := 0 to 7 - tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) -ENDFOR -FOR i := 0 to 3 - tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) -ENDFOR -dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". - -tmp := a -FOR i := 0 to 3 - tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] -ENDFOR -dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". - -tmp := a -FOR i := 0 to 3 - tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] -ENDFOR -dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". - -tmp := a -FOR i := 0 to 3 - tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) -ENDFOR -dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". - -tmp := a -FOR i := 0 to 3 - tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) -ENDFOR -dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". - -FOR j := 0 to 15 - dst.fp16[j] := ABS(v2.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". - -FOR j := 0 to 7 - dst.fp16[j] := ABS(v2.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) -ENDFOR -dst[MAX:256] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) -ENDFOR -dst[MAX:128] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 -ENDFOR -k[MAX:8] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 7 - IF k1[j] - k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Compare -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 15 - k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 -ENDFOR -k[MAX:16] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Compare -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 15 - IF k1[j] - k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Compare -
- - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 15 - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 15 - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - IF k[j] - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 96 bits of "dst" are zeroed out. - -FOR j := 0 TO 1 - IF k[j] - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:32] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 TO 3 - IF k[j] - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 to 3 - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 to 3 - IF k[j] - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The upper 64 bits of "dst" are zeroed out. - -FOR j := 0 to 3 - IF k[j] - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:64] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 TO 3 - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 TO 7 - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 3 - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 7 - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 TO 3 - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 TO 7 - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 3 - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 7 - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 TO 1 - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 1 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 1 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 TO 3 - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 1 - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 1 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 1 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 3 - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 TO 1 - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 1 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 1 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 TO 3 - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 1 - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 1 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 1 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 3 - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 3 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 TO 7 - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 TO 15 - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 7 - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 15 - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". - -FOR j := 0 TO 7 - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". - -FOR j := 0 TO 15 - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 7 - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 15 - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - IF k[j] - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) - ELSE - dst.fp64[j] := src.fp64[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - IF k[j] - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) - ELSE - dst.fp64[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) - ELSE - dst.fp64[j] := src.fp64[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) - ELSE - dst.fp64[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) - ELSE - dst.fp32[j] := src.fp32[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) - ELSE - dst.fp32[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) - ELSE - dst.fp32[j] := src.fp32[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) - ELSE - dst.fp32[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Convert -
- - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 7 - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 15 - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [max_float_note] - -dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] - -dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][max_float_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 7 - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 15 - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [min_float_note] - -dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] - -dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [sae_note][min_float_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Special Math Functions -
- - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 7 - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) -ENDFOR -dest[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dest[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dest[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 15 - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) -ENDFOR -dest[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dest[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dest[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 7 - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 15 - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 7 - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 7 - IF k[i] - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 7 - IF k[i] - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 15 - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 15 - IF k[i] - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 15 - IF k[i] - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 7 - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 15 - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 7 - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR i := 0 to 7 - k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) -ENDFOR -k[MAX:8] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR i := 0 to 7 - IF k1[i] - k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) - ELSE - k[i] := 0 - FI -ENDFOR -k[MAX:8] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR i := 0 to 15 - k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) -ENDFOR -k[MAX:16] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR i := 0 to 15 - IF k1[i] - k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) - ELSE - k[i] := 0 - FI -ENDFOR -k[MAX:16] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle half-precision (16-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - off := idx[i+2:i] - dst.fp16[j] := idx[i+3] ? b.fp16[off] : a.fp16[off] -ENDFOR -dst[MAX:128] := 0 - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle half-precision (16-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - off := idx[i+3:i] - dst.fp16[j] := idx[i+4] ? b.fp16[off] : a.fp16[off] -ENDFOR -dst[MAX:256] := 0 - - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := b.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 7 - IF k[j] - dst.fp16[j] := b.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle half-precision (16-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*16 - id := idx[i+3:i] - dst.fp16[j] := a.fp16[id] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - - Shuffle half-precision (16-bit) floating-point elements in "a" using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - id := idx[i+2:i] - dst.fp16[j] := a.fp16[id] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Miscellaneous -
- - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 7 - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 15 - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - -FOR i := 0 to 7 - dst.fp16[i] := SQRT(a.fp16[i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := SQRT(a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := SQRT(a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - -FOR i := 0 to 15 - dst.fp16[i] := SQRT(a.fp16[i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := SQRT(a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := SQRT(a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 7 - dst.fp16[i] := (1.0 / a.fp16[i]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := (1.0 / a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 7 - IF k[i] - dst.fp16[i] := (1.0 / a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 15 - dst.fp16[i] := (1.0 / a.fp16[i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := (1.0 / a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := (1.0 / a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Elementary Math Functions -
- - - - Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[127:0] := MEM[mem_addr+127:mem_addr] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Load -
- - - - Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[255:0] := MEM[mem_addr+255:mem_addr] -dst[MAX:256] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] -dst[MAX:128] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Load -
- - - - - Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Store -
- - - - - Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+255:mem_addr] := a[255:0] - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Store -
- - - - Return vector of type __m256h with undefined elements. - AVX512_FP16 - AVX512VL -
immintrin.h
- General Support -
- - - - Return vector of type __m128h with undefined elements. - AVX512_FP16 - AVX512VL -
immintrin.h
- General Support -
- - - - Return vector of type __m256h with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Set -
- - - - Return vector of type __m128h with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX512_FP16 - AVX512VL -
immintrin.h
- Set -
- - - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 TO 31 - dst.fp16[j] := a.fp16[j] + b.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := a.fp16[j] + b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := a.fp16[j] + b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - [round_note] - -FOR j := 0 TO 31 - dst.fp16[j] := a.fp16[j] + b.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := a.fp16[j] + b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Add packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := a.fp16[j] + b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := a.fp16[0] + b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := a.fp16[0] + b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := a.fp16[0] + b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := a.fp16[0] + b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := a.fp16[0] + b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Add the lower half-precision (16-bit) floating-point elements in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := a.fp16[0] + b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 31 - dst.fp16[j] := a.fp16[j] / b.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := a.fp16[j] / b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := a.fp16[j] / b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - [round_note] - -FOR j := 0 to 31 - dst.fp16[j] := a.fp16[j] / b.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := a.fp16[j] / b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Divide packed half-precision (16-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := a.fp16[j] / b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := a.fp16[0] / b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := a.fp16[0] / b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := a.fp16[0] / b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := a.fp16[0] / b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := a.fp16[0] / b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Divide the lower half-precision (16-bit) floating-point element in "a" by the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := a.fp16[0] / b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 31 - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - [round_note] - -FOR j := 0 to 31 - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := a.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := c.fp16[0] -FI -dst[127:16] := c[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := a.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := c.fp16[0] -FI -dst[127:16] := c[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 31 - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - [round_note] - -FOR j := 0 to 31 - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := a.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := c.fp16[0] -FI -dst[127:16] := c[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := a.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := c.fp16[0] -FI -dst[127:16] := c[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) + c.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 31 - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 31 - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := a.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := c.fp16[0] -FI -dst[127:16] := c[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := a.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := c.fp16[0] -FI -dst[127:16] := c[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 31 - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 31 - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := -(a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := a.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := c.fp16[0] -FI -dst[127:16] := c[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := a.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 7 packed elements from "c" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := c.fp16[0] -FI -dst[127:16] := c[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := -(a.fp16[0] * b.fp16[0]) - c.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 31 - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 31 - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - FI - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 31 - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst". - [round_note] - -FOR j := 0 to 31 - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := c.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 31 - IF k[j] - IF ((j & 1) == 0) - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) + c.fp16[j] - ELSE - dst.fp16[j] := (a.fp16[j] * b.fp16[j]) - c.fp16[j] - FI - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 TO 31 - dst.fp16[j] := a.fp16[j] - b.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - [round_note] - -FOR j := 0 TO 31 - dst.fp16[j] := a.fp16[j] - b.fp16[j] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := a.fp16[j] - b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := a.fp16[j] - b.fp16[j] - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := a.fp16[j] - b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Subtract packed half-precision (16-bit) floating-point elements in "b" from packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := a.fp16[j] - b.fp16[j] - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := a.fp16[0] - b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := a.fp16[0] - b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := a.fp16[0] - b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := a.fp16[0] - b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := a.fp16[0] - b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Subtract the lower half-precision (16-bit) floating-point element in "b" from the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := a.fp16[0] - b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR i := 0 TO 31 - dst.fp16[i] := a.fp16[i] * b.fp16[i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst". - [round_note] - -FOR i := 0 TO 31 - dst.fp16[i] := a.fp16[i] * b.fp16[i] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 TO 31 - IF k[i] - dst.fp16[i] := a.fp16[i] * b.fp16[i] - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR i := 0 TO 31 - IF k[i] - dst.fp16[i] := a.fp16[i] * b.fp16[i] - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 TO 31 - IF k[i] - dst.fp16[i] := a.fp16[i] * b.fp16[i] - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed half-precision (16-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR i := 0 TO 31 - IF k[i] - dst.fp16[i] := a.fp16[i] * b.fp16[i] - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := a.fp16[0] * b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := a.fp16[0] * b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := a.fp16[0] * b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := a.fp16[0] * b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := a.fp16[0] * b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower half-precision (16-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := a.fp16[0] * b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := src.fp16[0] - dst.fp16[1] := src.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := src.fp16[0] - dst.fp16[1] := src.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := src.fp16[0] - dst.fp16[1] := src.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := src.fp16[0] - dst.fp16[1] := src.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex numbers in "a" and "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := src.fp16[2*i+0] - dst.fp16[2*i+1] := src.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := src.fp16[0] - dst.fp16[1] := src.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := src.fp16[0] - dst.fp16[1] := src.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := src.fp16[0] - dst.fp16[1] := src.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "src" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := src.fp16[0] - dst.fp16[1] := src.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "src", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := a.fp16[2*i+0] - dst.fp16[2*i+1] := a.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "src", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := c.fp16[2*i+0] - dst.fp16[2*i+1] := c.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := a.fp16[2*i+0] - dst.fp16[2*i+1] := a.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := c.fp16[2*i+0] - dst.fp16[2*i+1] := c.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" and "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) - (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) + (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := a.fp16[0] - dst.fp16[1] := a.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := c.fp16[0] - dst.fp16[1] := c.fp16[1] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := a.fp16[0] - dst.fp16[1] := a.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := c.fp16[0] - dst.fp16[1] := c.fp16[1] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex numbers in "a" and "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) - (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) + (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := a.fp16[2*i+0] - dst.fp16[2*i+1] := a.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := c.fp16[2*i+0] - dst.fp16[2*i+1] := c.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := a.fp16[2*i+0] - dst.fp16[2*i+1] := a.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := c.fp16[2*i+0] - dst.fp16[2*i+1] := c.fp16[2*i+1] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply packed complex numbers in "a" by the complex conjugates of packed complex numbers in "b", accumulate to the corresponding complex numbers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -FOR i := 0 to 15 - IF k[i] - dst.fp16[2*i+0] := (a.fp16[2*i+0] * b.fp16[2*i+0]) + (a.fp16[2*i+1] * b.fp16[2*i+1]) + c.fp16[2*i+0] - dst.fp16[2*i+1] := (a.fp16[2*i+1] * b.fp16[2*i+0]) - (a.fp16[2*i+0] * b.fp16[2*i+1]) + c.fp16[2*i+1] - ELSE - dst.fp16[2*i+0] := 0 - dst.fp16[2*i+1] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := a.fp16[0] - dst.fp16[1] := a.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := c.fp16[0] - dst.fp16[1] := c.fp16[1] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst", and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] -dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "a" when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := a.fp16[0] - dst.fp16[1] := a.fp16[1] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using writemask "k" (elements are copied from "c" when mask bit 0 is not set), and copy the upper 6 packed elements from "c" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := c.fp16[0] - dst.fp16[1] := c.fp16[1] -FI -dst[127:32] := c[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the lower complex number in "a" by the complex conjugate of the lower complex number in "b", accumulate to the lower complex number in "c", and store the result in the lower elements of "dst" using zeromask "k" (elements are zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements from "a" to the upper elements of "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - [round_note] - -IF k[0] - dst.fp16[0] := (a.fp16[0] * b.fp16[0]) + (a.fp16[1] * b.fp16[1]) + c.fp16[0] - dst.fp16[1] := (a.fp16[1] * b.fp16[0]) - (a.fp16[0] * b.fp16[1]) + c.fp16[1] -ELSE - dst.fp16[0] := 0 - dst.fp16[1] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a". - -tmp := a -FOR i := 0 to 15 - tmp.fp16[i] := tmp.fp16[i] + a.fp16[i+16] -ENDFOR -FOR i := 0 to 7 - tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+8] -ENDFOR -FOR i := 0 to 3 - tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+4] -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := tmp.fp16[i] + tmp.fp16[i+2] -ENDFOR -dst.fp16[0] := tmp.fp16[0] + tmp.fp16[1] - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a". - -tmp := a -FOR i := 0 to 15 - tmp.fp16[i] := tmp.fp16[i] * a.fp16[i+16] -ENDFOR -FOR i := 0 to 7 - tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+8] -ENDFOR -FOR i := 0 to 3 - tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+4] -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := tmp.fp16[i] * tmp.fp16[i+2] -ENDFOR -dst.fp16[0] := tmp.fp16[0] * tmp.fp16[1] - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a". [max_float_note] - -tmp := a -FOR i := 0 to 15 - tmp.fp16[i] := (a.fp16[i] > a.fp16[i+16] ? a.fp16[i] : a.fp16[i+16]) -ENDFOR -FOR i := 0 to 7 - tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) -ENDFOR -FOR i := 0 to 3 - tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := (tmp.fp16[i] > tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) -ENDFOR -dst.fp16[0] := (tmp.fp16[0] > tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - Reduce the packed half-precision (16-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a". [min_float_note] - -tmp := a -FOR i := 0 to 15 - tmp.fp16[i] := (a.fp16[i] < a.fp16[i+16] ? tmp.fp16[i] : a.fp16[i+16]) -ENDFOR -FOR i := 0 to 7 - tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+8] ? tmp.fp16[i] : tmp.fp16[i+8]) -ENDFOR -FOR i := 0 to 3 - tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+4] ? tmp.fp16[i] : tmp.fp16[i+4]) -ENDFOR -FOR i := 0 to 1 - tmp.fp16[i] := (tmp.fp16[i] < tmp.fp16[i+2] ? tmp.fp16[i] : tmp.fp16[i+2]) -ENDFOR -dst.fp16[0] := (tmp.fp16[0] < tmp.fp16[1] ? tmp.fp16[0] : tmp.fp16[1]) - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - Finds the absolute value of each packed half-precision (16-bit) floating-point element in "v2", storing the results in "dst". - -FOR j := 0 to 31 - dst.fp16[j] := ABS(v2.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst". Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) -ENDFOR -dst[MAX:512] := 0 - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - Compute the complex conjugates of complex numbers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number "complex = vec.fp16[0] + i * vec.fp16[1]", or the complex conjugate "conjugate = vec.fp16[0] - i * vec.fp16[1]". - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := a[i+31:i] XOR FP32(-0.0) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - AVX512_FP16 -
immintrin.h
- Arithmetic -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 31 - k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 31 - IF k1[j] - k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 31 - k[j] := (a.fp16[j] OP b.fp16[j]) ? 1 : 0 -ENDFOR -k[MAX:32] := 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - CASE (imm8[3:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -FOR j := 0 to 31 - IF k1[j] - k[j] := ( a.fp16[j] OP b.fp16[j] ) ? 1 : 0 - ELSE - k[j] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -k[0] := (a.fp16[0] OP b.fp16[0]) ? 1 : 0 -k[MAX:1] := 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -k[0] := (a.fp16[0] OP b.fp16[0]) ? 1 : 0 -k[MAX:1] := 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -IF k1[0] - k[0] := ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -IF k1[0] - k[0] := ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -RETURN ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note] - CASE (imm8[4:0]) OF -0: OP := _CMP_EQ_OQ -1: OP := _CMP_LT_OS -2: OP := _CMP_LE_OS -3: OP := _CMP_UNORD_Q -4: OP := _CMP_NEQ_UQ -5: OP := _CMP_NLT_US -6: OP := _CMP_NLE_US -7: OP := _CMP_ORD_Q -8: OP := _CMP_EQ_UQ -9: OP := _CMP_NGE_US -10: OP := _CMP_NGT_US -11: OP := _CMP_FALSE_OQ -12: OP := _CMP_NEQ_OQ -13: OP := _CMP_GE_OS -14: OP := _CMP_GT_OS -15: OP := _CMP_TRUE_UQ -16: OP := _CMP_EQ_OS -17: OP := _CMP_LT_OQ -18: OP := _CMP_LE_OQ -19: OP := _CMP_UNORD_S -20: OP := _CMP_NEQ_US -21: OP := _CMP_NLT_UQ -22: OP := _CMP_NLE_UQ -23: OP := _CMP_ORD_S -24: OP := _CMP_EQ_US -25: OP := _CMP_NGE_UQ -26: OP := _CMP_NGT_UQ -27: OP := _CMP_FALSE_OS -28: OP := _CMP_NEQ_OS -29: OP := _CMP_GE_OQ -30: OP := _CMP_GT_OQ -31: OP := _CMP_TRUE_US -ESAC -RETURN ( a.fp16[0] OP b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for equality, and return the boolean result (0 or 1). - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] == b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than, and return the boolean result (0 or 1). - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] < b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] <= b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than, and return the boolean result (0 or 1). - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] > b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] >= b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for not-equal, and return the boolean result (0 or 1). - RETURN ( a.fp16[0] ==NaN OR b.fp16[0] ==NaN OR a.fp16[0] != b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] == b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] < b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] <= b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] > b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a.fp16[0] !=NaN AND b.fp16[0] !=NaN AND a.fp16[0] >= b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - - Compare the lower half-precision (16-bit) floating-point elements in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a.fp16[0] ==NaN OR b.fp16[0] ==NaN OR a.fp16[0] != b.fp16[0] ) ? 1 : 0 - - - AVX512_FP16 -
immintrin.h
- Compare -
- - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 31 - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 TO 31 - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed signed 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 31 - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 TO 31 - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 16-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.fp16[j] := Convert_Int16_To_FP16(a.word[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 15 - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 TO 15 - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed signed 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 15 - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 TO 15 - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 32-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 15 - IF k[j] - dst.fp16[j] := Convert_Int32_To_FP16(a.dword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed signed 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed unsigned 64-bit integers in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_Int64_To_FP16(a.qword[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 TO 7 - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.fp16[j] := Convert_FP64_To_FP16(a.fp64[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper element of "dst". - -dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper element of "dst". - [round_note] - -dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". - -IF k[0] - dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". - -IF k[0] - dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper element of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := Convert_FP64_To_FP16(b.fp64[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 15 - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_note] - -FOR j := 0 to 15 - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 to 15 - IF k[j] - dst.fp16[j] := Convert_FP32_To_FP16(a.fp32[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a half-precision (16-bit) floating-point elements, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := Convert_FP32_To_FP16(b.fp32[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 TO 15 - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 TO 15 - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 15 - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 TO 15 - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_Int32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - -FOR j := 0 TO 15 - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 TO 15 - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 15 - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 TO 15 - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 15 - IF k[j] - dst.dword[j] := Convert_FP16_To_UInt32_Truncate(a.fp16[j]) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 TO 7 - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 TO 7 - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 7 - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 TO 7 - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_Int64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - -FOR j := 0 TO 7 - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 TO 7 - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 7 - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 TO 7 - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := src.qword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 7 - IF k[j] - dst.qword[j] := Convert_FP16_To_UInt64_Truncate(a.fp16[j]) - ELSE - dst.qword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 TO 31 - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". - [round_note] - -FOR j := 0 TO 31 - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 31 - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 TO 31 - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_Int16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". - -FOR j := 0 TO 31 - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst". [sae_note] - -FOR j := 0 TO 31 - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". - -FOR j := 0 TO 31 - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst". [sae_note] - -FOR j := 0 TO 31 - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := src.word[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed unsigned 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 TO 31 - IF k[j] - dst.word[j] := Convert_FP16_To_UInt16_Truncate(a.fp16[j]) - ELSE - dst.word[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". [sae_note] - -FOR j := 0 to 7 - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) - ELSE - dst.fp64[j] := src.fp64[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - IF k[j] - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) - ELSE - dst.fp64[j] := src.fp64[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) - ELSE - dst.fp64[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 7 - IF k[j] - dst.fp64[j] := Convert_FP16_To_FP64(a.fp16[j]) - ELSE - dst.fp64[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 15 - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". [sae_note] - -FOR j := 0 to 15 - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) - ELSE - dst.fp32[j] := src.fp32[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 15 - IF k[j] - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) - ELSE - dst.fp32[j] := src.fp32[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) - ELSE - dst.fp32[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note] - -FOR j := 0 to 15 - IF k[j] - dst.fp32[j] := Convert_FP16_To_FP32(a.fp16[j]) - ELSE - dst.fp32[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [sae_note] - -dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) -ELSE - dst.fp64[0] := src.fp64[0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note] - -IF k[0] - dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) -ELSE - dst.fp64[0] := src.fp64[0] -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". - -IF k[0] - dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) -ELSE - dst.fp64[0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note] - -IF k[0] - dst.fp64[0] := Convert_FP16_To_FP64(b.fp16[0]) -ELSE - dst.fp64[0] := 0 -FI -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] - -dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) -ELSE - dst.fp32[0] := src.fp32[0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] - -IF k[0] - dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) -ELSE - dst.fp32[0] := src.fp32[0] -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) -ELSE - dst.fp32[0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - - Convert the lower half-precision (16-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note] - -IF k[0] - dst.fp32[0] := Convert_FP16_To_FP32(b.fp16[0]) -ELSE - dst.fp32[0] := 0 -FI -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - -dst.dword := Convert_FP16_To_Int32(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - [round_note] - -dst.dword := Convert_FP16_To_Int32(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - -dst.qword := Convert_FP16_To_Int64(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - [round_note] - -dst.qword := Convert_FP16_To_Int64(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - -dst.dword := Convert_FP16_To_Int32_Truncate(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". [sae_note] - -dst.dword := Convert_FP16_To_Int32_Truncate(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - -dst.qword := Convert_FP16_To_Int64_Truncate(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". [sae_note] - -dst.qword := Convert_FP16_To_Int64_Truncate(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". - -dst.dword := Convert_FP16_To_UInt32(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst". [sae_note] - -dst.dword := Convert_FP16_To_UInt32(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". - -dst.qword := Convert_FP16_To_UInt64(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst". [round_note] - -dst.qword := Convert_FP16_To_UInt64(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". - -dst.dword := Convert_FP16_To_UInt32_Truncate(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst". [sae_note] - -dst.dword := Convert_FP16_To_UInt32_Truncate(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". - -dst.qword := Convert_FP16_To_UInt64_Truncate(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the lower half-precision (16-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst". [sae_note] - -dst.qword := Convert_FP16_To_UInt64_Truncate(a.fp16[0]) - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the signed 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the signed 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the unsigned 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the unsigned 32-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := Convert_Int32_To_FP16(b.fp32[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the signed 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the signed 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Convert the unsigned 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - - Convert the unsigned 64-bit integer "b" to a half-precision (16-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := Convert_Int64_To_FP16(b.fp64[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Copy 16-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst". - -dst.fp16[0] := a.fp16[0] -dst[MAX:16] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Copy the lower 16-bit integer in "a" to "dst". - -dst.fp16[0] := a.fp16[0] -dst[MAX:16] := 0 - - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". - -dst[15:0] := a.fp16[0] - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". - -dst[15:0] := a.fp16[0] - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - Copy the lower half-precision (16-bit) floating-point element of "a" to "dst". - -dst[15:0] := a.fp16[0] - - AVX512_FP16 -
immintrin.h
- Convert -
- - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 31 - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [sae_note][max_float_note] - -FOR j := 0 to 31 - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][max_float_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][max_float_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 31 - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [sae_note] [min_float_note] - -FOR j := 0 to 31 - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note][min_float_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := src.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - Compare packed half-precision (16-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note][min_float_note] - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j]) - ELSE - dst.fp16[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - - Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -IF k[0] - dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - - - Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -IF k[0] - dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -IF k[0] - dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - - - - - Extract the reduced argument of the lower half-precision (16-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -IF k[0] - dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Special Math Functions -
- - - - Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst", and zero the upper elements. - -dst.fp16[0] := MEM[mem_addr].fp16[0] -dst[MAX:16] := 0 - - - AVX512_FP16 -
immintrin.h
- Load -
- - - - - - Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. - -IF k[0] - dst.fp16[0] := MEM[mem_addr].fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[MAX:16] := 0 - - - AVX512_FP16 -
immintrin.h
- Load -
- - - - - Load a half-precision (16-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. - -IF k[0] - dst.fp16[0] := MEM[mem_addr].fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[MAX:16] := 0 - - - AVX512_FP16 -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Load -
- - - - Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[511:0] := MEM[mem_addr+511:mem_addr] -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Load -
- - - - - Store the lower half-precision (16-bit) floating-point element from "a" into memory. - -MEM[mem_addr].fp16[0] := a.fp16[0] - - - AVX512_FP16 -
immintrin.h
- Store -
- - - - - - Store the lower half-precision (16-bit) floating-point element from "a" into memory using writemask "k". - -IF k[0] - MEM[mem_addr].fp16[0] := a.fp16[0] -FI - - - AVX512_FP16 -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512_FP16 -
immintrin.h
- Store -
- - - - - Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+511:mem_addr] := a[511:0] - - - AVX512_FP16 -
immintrin.h
- Store -
- - - - - Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := b.fp16[0] -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Move -
- - - - - - - Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := b.fp16[0] -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Move -
- - - - - - Move the lower half-precision (16-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := b.fp16[0] -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Move -
- - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 31 - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) -ENDFOR -dest[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 31 - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) -ENDFOR -dest[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dest[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dest[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dest[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Round packed half-precision (16-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := RoundScaleFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dest[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) -dst[127:16] := a[127:16] -dest[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) -dst[127:16] := a[127:16] -dest[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -IF k[0] - dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dest[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - - Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -IF k[0] - dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dest[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -IF k[0] - dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dest[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note] - -DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) { - m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0]) - RETURN tmp.fp16 -} -IF k[0] - dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dest[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 31 - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] - FOR i := 0 to 31 - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] - FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of each packed half-precision (16-bit) floating-point element in "a" to a half-precision (16-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element. [sae_note] - FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := ConvertExpFP16(a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - dst.fp16[0] := ConvertExpFP16(b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] - dst.fp16[0] := ConvertExpFP16(b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - IF k[0] - dst.fp16[0] := ConvertExpFP16(b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] - IF k[0] - dst.fp16[0] := ConvertExpFP16(b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. - IF k[0] - dst.fp16[0] := ConvertExpFP16(b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Convert the exponent of the lower half-precision (16-bit) floating-point element in "b" to a half-precision (16-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element. [sae_note] - IF k[0] - dst.fp16[0] := ConvertExpFP16(b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 31 - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note][sae_note] - FOR i := 0 TO 31 - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 31 - IF k[i] - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note][sae_note] - FOR i := 0 TO 31 - IF k[i] - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - FOR i := 0 TO 31 - IF k[i] - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note][sae_note] - FOR i := 0 TO 31 - IF k[i] - dst.fp16[i] := GetNormalizedMantissaFP16(a.fp16[i], norm, sign) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note][sae_note] - dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - IF k[0] - dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - - - Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note][sae_note] - IF k[0] - dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note] - IF k[0] - dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - - Normalize the mantissas of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "norm" and the sign depends on "sign" and the source sign. - [getmant_note][sae_note] - IF k[0] - dst.fp16[0] := GetNormalizedMantissaFP16(b.fp16[0], norm, sign) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 31 - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 31 - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Extract the reduced argument of packed half-precision (16-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note] - -DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) { - m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved - tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0]) - tmp[15:0] := src[15:0] - tmp[15:0] - IF IsInf(tmp[15:0]) - tmp[15:0] := FP16(0.0) - FI - RETURN tmp[15:0] -} -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := ReduceArgumentFP16(a.fp16[i], imm8) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst". - [round_note] - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed half-precision (16-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -FOR i := 0 to 15 - IF k[i] - dst.fp16[i] := ScaleFP16(a.fp16[i], b.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -IF k[0] - dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -IF k[0] - dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -IF k[0] - dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - - Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - DEFINE ScaleFP16(src1, src2) { - denormal1 := (a.exp == 0) and (a.fraction != 0) - denormal2 := (b.exp == 0) and (b.fraction != 0) - tmp1 := src1 - tmp2 := src2 - IF MXCSR.DAZ - IF denormal1 - tmp1 := 0 - FI - IF denormal2 - tmp2 := 0 - FI - FI - RETURN tmp1 * POW(2.0, FLOOR(tmp2)) -} -IF k[0] - dst.fp16[0] := ScaleFP16(a.fp16[0], b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k". - [fpclass_note] - FOR i := 0 to 31 - k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) -ENDFOR -k[MAX:32] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Test packed half-precision (16-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set). - [fpclass_note] - FOR i := 0 to 31 - IF k1[i] - k[i] := CheckFPClass_FP16(a.fp16[i], imm8[7:0]) - ELSE - k[i] := 0 - FI -ENDFOR -k[MAX:32] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Test the lower half-precision (16-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k". - [fpclass_note] - k[0] := CheckFPClass_FP16(a.fp16[0], imm8[7:0]) -k[MAX:1] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Test the lower half-precision (16-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). - [fpclass_note] - IF k1[0] - k[0] := CheckFPClass_FP16(a.fp16[0], imm8[7:0]) -ELSE - k[0] := 0 -FI -k[MAX:1] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Shuffle half-precision (16-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - off := idx[i+4:i] - dst.fp16[j] := idx[i+5] ? b.fp16[off] : a.fp16[off] -ENDFOR -dst[MAX:512] := 0 - - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - - Blend packed half-precision (16-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst". - -FOR j := 0 to 31 - IF k[j] - dst.fp16[j] := b.fp16[j] - ELSE - dst.fp16[j] := a.fp16[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - - Shuffle half-precision (16-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 31 - i := j*16 - id := idx[i+4:i] - dst.fp16[j] := a.fp16[id] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Miscellaneous -
- - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 31 - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := (1.0 / SQRT(a.fp16[i])) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -IF k[0] - dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -IF k[0] - dst.fp16[0] := (1.0 / SQRT(b.fp16[0])) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - -FOR i := 0 to 31 - dst.fp16[i] := SQRT(a.fp16[i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". - [round_note] - -FOR i := 0 to 31 - dst.fp16[i] := SQRT(a.fp16[i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := SQRT(a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - [round_note] - -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := SQRT(a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := SQRT(a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - [round_note] - -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := SQRT(a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -dst.fp16[0] := SQRT(b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst.fp16[0] := SQRT(b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := SQRT(b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - - - Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := SQRT(b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - -IF k[0] - dst.fp16[0] := SQRT(b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the square root of the lower half-precision (16-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". - [round_note] - -IF k[0] - dst.fp16[0] := SQRT(b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 31 - dst.fp16[i] := (1.0 / a.fp16[i]) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := (1.0 / a.fp16[i]) - ELSE - dst.fp16[i] := src.fp16[i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal of packed half-precision (16-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR i := 0 to 31 - IF k[i] - dst.fp16[i] := (1.0 / a.fp16[i]) - ELSE - dst.fp16[i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -dst.fp16[0] := (1.0 / b.fp16[0]) -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - - Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -IF k[0] - dst.fp16[0] := (1.0 / b.fp16[0]) -ELSE - dst.fp16[0] := src.fp16[0] -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -IF k[0] - dst.fp16[0] := (1.0 / b.fp16[0]) -ELSE - dst.fp16[0] := 0 -FI -dst[127:16] := a[127:16] -dst[MAX:128] := 0 - - - AVX512_FP16 -
immintrin.h
- Elementary Math Functions -
- - - - - - - - - - - Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. - -dst.fp16[0] := e0 -dst.fp16[1] := e1 -dst.fp16[2] := e2 -dst.fp16[3] := e3 -dst.fp16[4] := e4 -dst.fp16[5] := e5 -dst.fp16[6] := e6 -dst.fp16[7] := e7 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. - -dst.fp16[0] := e0 -dst.fp16[1] := e1 -dst.fp16[2] := e2 -dst.fp16[3] := e3 -dst.fp16[4] := e4 -dst.fp16[5] := e5 -dst.fp16[6] := e6 -dst.fp16[7] := e7 -dst.fp16[8] := e8 -dst.fp16[9] := e9 -dst.fp16[10] := e10 -dst.fp16[11] := e11 -dst.fp16[12] := e12 -dst.fp16[13] := e13 -dst.fp16[14] := e14 -dst.fp16[15] := e15 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values. - -dst.fp16[0] := e0 -dst.fp16[1] := e1 -dst.fp16[2] := e2 -dst.fp16[3] := e3 -dst.fp16[4] := e4 -dst.fp16[5] := e5 -dst.fp16[6] := e6 -dst.fp16[7] := e7 -dst.fp16[8] := e8 -dst.fp16[9] := e9 -dst.fp16[10] := e10 -dst.fp16[11] := e11 -dst.fp16[12] := e12 -dst.fp16[13] := e13 -dst.fp16[14] := e14 -dst.fp16[15] := e15 -dst.fp16[16] := e16 -dst.fp16[17] := e17 -dst.fp16[18] := e18 -dst.fp16[19] := e19 -dst.fp16[20] := e20 -dst.fp16[21] := e21 -dst.fp16[22] := e22 -dst.fp16[23] := e23 -dst.fp16[24] := e24 -dst.fp16[25] := e25 -dst.fp16[26] := e26 -dst.fp16[27] := e27 -dst.fp16[28] := e28 -dst.fp16[29] := e29 -dst.fp16[30] := e30 -dst.fp16[31] := e31 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - - - - - - - - Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst.fp16[0] := e7 -dst.fp16[1] := e6 -dst.fp16[2] := e5 -dst.fp16[3] := e4 -dst.fp16[4] := e3 -dst.fp16[5] := e2 -dst.fp16[6] := e1 -dst.fp16[7] := e0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst.fp16[0] := e15 -dst.fp16[1] := e14 -dst.fp16[2] := e13 -dst.fp16[3] := e12 -dst.fp16[4] := e11 -dst.fp16[5] := e10 -dst.fp16[6] := e9 -dst.fp16[7] := e8 -dst.fp16[8] := e7 -dst.fp16[9] := e6 -dst.fp16[10] := e5 -dst.fp16[11] := e4 -dst.fp16[12] := e3 -dst.fp16[13] := e2 -dst.fp16[14] := e1 -dst.fp16[15] := e0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Set packed half-precision (16-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst.fp16[0] := e31 -dst.fp16[1] := e30 -dst.fp16[2] := e29 -dst.fp16[3] := e28 -dst.fp16[4] := e27 -dst.fp16[5] := e26 -dst.fp16[6] := e25 -dst.fp16[7] := e24 -dst.fp16[8] := e23 -dst.fp16[9] := e22 -dst.fp16[10] := e21 -dst.fp16[11] := e20 -dst.fp16[12] := e19 -dst.fp16[13] := e18 -dst.fp16[14] := e17 -dst.fp16[15] := e16 -dst.fp16[16] := e15 -dst.fp16[17] := e14 -dst.fp16[18] := e13 -dst.fp16[19] := e12 -dst.fp16[20] := e11 -dst.fp16[21] := e10 -dst.fp16[22] := e9 -dst.fp16[23] := e8 -dst.fp16[24] := e7 -dst.fp16[25] := e6 -dst.fp16[26] := e5 -dst.fp16[27] := e4 -dst.fp16[28] := e3 -dst.fp16[29] := e2 -dst.fp16[30] := e1 -dst.fp16[31] := e0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". - -FOR i := 0 to 7 - dst.fp16[i] := a[15:0] -ENDFOR -dst[MAX:128] := 0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". - -FOR i := 0 to 15 - dst.fp16[i] := a[15:0] -ENDFOR -dst[MAX:256] := 0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - Broadcast half-precision (16-bit) floating-point value "a" to all elements of "dst". - -FOR i := 0 to 31 - dst.fp16[i] := a[15:0] -ENDFOR -dst[MAX:512] := 0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". - -FOR i := 0 to 3 - dst.fp16[2*i+0] := a[15:0] - dst.fp16[2*i+1] := a[31:16] -ENDFOR -dst[MAX:128] := 0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". - -FOR i := 0 to 7 - dst.fp16[2*i+0] := a[15:0] - dst.fp16[2*i+1] := a[31:16] -ENDFOR -dst[MAX:256] := 0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - Broadcast half-precision (16-bit) complex floating-point value "a" to all elements of "dst". - -FOR i := 0 to 15 - dst.fp16[2*i+0] := a[15:0] - dst.fp16[2*i+1] := a[31:16] -ENDFOR -dst[MAX:512] := 0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - - Copy half-precision (16-bit) floating-point element "a" to the lower element of "dst", and zero the upper 7 elements. - -dst.fp16[0] := a[15:0] -dst[127:16] := 0 - - AVX512_FP16 -
immintrin.h
- Set -
- - - Return vector of type __m512h with all elements set to zero. - -dst[MAX:0] := 0 - - - AVX512_FP16 -
immintrin.h
- Set -
- - - - Cast vector of type "__m128h" to type "__m128". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256h" to type "__m256". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m512h" to type "__m512". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128h" to type "__m128d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256h" to type "__m256d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m512h" to type "__m512d". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128h" to type "__m128i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256h" to type "__m256i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m512h" to type "__m512i". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m512" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128d" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256d" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m512d" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128i" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256i" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m512i" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256h" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m512h" to type "__m128h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m512h" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128h" to type "__m256h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128h" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256h" to type "__m512h". This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128h" to type "__m256h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m128h" to type "__m512h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - - Cast vector of type "__m256h" to type "__m512h"; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - AVX512_FP16 -
immintrin.h
- Cast -
- - - Return vector of type __m512h with undefined elements. - AVX512_FP16 -
immintrin.h
- General Support -
- - - - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". - -FOR i := 0 to 3 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 to 3 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - IF k[i*8+j] - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ELSE - dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] - FI - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 3 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - IF k[i*8+j] - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ELSE - dst[q+j*8+7:q+j*8] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". - -FOR i := 0 to 1 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 to 1 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - IF k[i*8+j] - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ELSE - dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] - FI - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 1 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - IF k[i*8+j] - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ELSE - dst[q+j*8+7:q+j*8] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Bit Manipulation -
- - - - - Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - id := idx[i+4:i]*8 - dst[i+7:i] := a[id+7:id] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - id := idx[i+4:i]*8 - IF k[j] - dst[i+7:i] := a[id+7:id] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - id := idx[i+4:i]*8 - IF k[j] - dst[i+7:i] := a[id+7:id] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - id := idx[i+3:i]*8 - dst[i+7:i] := a[id+7:id] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - id := idx[i+3:i]*8 - IF k[j] - dst[i+7:i] := a[id+7:id] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - id := idx[i+3:i]*8 - IF k[j] - dst[i+7:i] := a[id+7:id] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 31 - i := j*8 - off := 8*idx[i+4:i] - dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - off := 8*idx[i+4:i] - dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - off := 8*idx[i+4:i] - dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := idx[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*8 - IF k[j] - off := 8*idx[i+4:i] - dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - off := 8*idx[i+3:i] - dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - off := 8*idx[i+3:i] - dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - off := 8*idx[i+3:i] - dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := idx[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*8 - IF k[j] - off := 8*idx[i+3:i] - dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - - AVX512_VBMI - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst". - -FOR i := 0 to 7 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Bit Manipulation -
- - - - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR i := 0 to 7 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - IF k[i*8+j] - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ELSE - dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8] - FI - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Bit Manipulation -
- - - - - - For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR i := 0 to 7 - q := i * 64 - FOR j := 0 to 7 - tmp8 := 0 - ctrl := a[q+j*8+7:q+j*8] & 63 - FOR l := 0 to 7 - tmp8[l] := b[q+((ctrl+l) & 63)] - ENDFOR - IF k[i*8+j] - dst[q+j*8+7:q+j*8] := tmp8[7:0] - ELSE - dst[q+j*8+7:q+j*8] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Bit Manipulation -
- - - - - Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - id := idx[i+5:i]*8 - dst[i+7:i] := a[id+7:id] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - id := idx[i+5:i]*8 - IF k[j] - dst[i+7:i] := a[id+7:id] - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Swizzle -
- - - - - - Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - id := idx[i+5:i]*8 - IF k[j] - dst[i+7:i] := a[id+7:id] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Swizzle -
- - - - - - Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst". - -FOR j := 0 to 63 - i := j*8 - off := 8*idx[i+5:i] - dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - off := 8*idx[i+5:i] - dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - off := 8*idx[i+5:i] - dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := idx[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI -
immintrin.h
- Swizzle -
- - - - - - - Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 63 - i := j*8 - IF k[j] - off := 8*idx[i+5:i] - dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off] - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - - AVX512_VBMI -
immintrin.h
- Swizzle -
- - - - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". - -FOR j := 0 to 15 - i := j*16 - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". - -FOR j := 0 to 7 - i := j*32 - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". - -FOR j := 0 to 3 - i := j*32 - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". - -FOR j := 0 to 15 - i := j*16 - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". - -FOR j := 0 to 7 - i := j*16 - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 1 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". - -FOR j := 0 to 7 - i := j*32 - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". - -FOR j := 0 to 3 - i := j*32 - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). - -FOR j := 0 to 15 - i := j*16 - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). - -FOR j := 0 to 7 - i := j*16 - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Shift -
- - Swizzle - - - - Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] - m := m + 16 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] - m := m + 16 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Load -
- - Swizzle - - - - Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] - m := m + 16 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] - m := m + 16 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Load -
- - Swizzle - - - - Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] - m := m + 8 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] - m := m + 8 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Load -
- - Swizzle - - - - Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] - m := m + 8 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] - m := m + 8 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Load -
- - - - - Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[m+15:m] - m := m + 16 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[i+15:i] := a[m+15:m] - m := m + 16 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[m+15:m] - m := m + 16 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[i+15:i] := a[m+15:m] - m := m + 16 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[m+7:m] - m := m + 8 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[i+7:i] := a[m+7:m] - m := m + 8 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[m+7:m] - m := m + 8 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[i+7:i] := a[m+7:m] - m := m + 8 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 16 -m := 0 -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR -dst[255:m] := 0 -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 16 -m := 0 -FOR j := 0 to 15 - i := j*16 - IF k[j] - dst[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR -dst[255:m] := src[255:m] -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 16 -m := 0 -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR -dst[127:m] := 0 -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 16 -m := 0 -FOR j := 0 to 7 - i := j*16 - IF k[j] - dst[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR -dst[127:m] := src[127:m] -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 8 -m := 0 -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR -dst[255:m] := 0 -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 8 -m := 0 -FOR j := 0 to 31 - i := j*8 - IF k[j] - dst[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR -dst[255:m] := src[255:m] -dst[MAX:256] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 8 -m := 0 -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR -dst[127:m] := 0 -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 8 -m := 0 -FOR j := 0 to 15 - i := j*8 - IF k[j] - dst[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR -dst[127:m] := src[127:m] -dst[MAX:128] := 0 - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Swizzle -
- - Swizzle - - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 16 -m := base_addr -FOR j := 0 to 15 - i := j*16 - IF k[j] - MEM[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Store -
- - Swizzle - - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 16 -m := base_addr -FOR j := 0 to 7 - i := j*16 - IF k[j] - MEM[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Store -
- - Swizzle - - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 8 -m := base_addr -FOR j := 0 to 31 - i := j*8 - IF k[j] - MEM[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Store -
- - Swizzle - - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 8 -m := base_addr -FOR j := 0 to 15 - i := j*8 - IF k[j] - MEM[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR - - - AVX512_VBMI2 - AVX512VL -
immintrin.h
- Store -
- - - - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> (c[i+63:i] & 63) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> (c[i+31:i] & 31) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> (c[i+15:i] & 15) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst". - -FOR j := 0 to 7 - i := j*64 - dst[i+63:i] := ((b[i+63:i] << 64)[127:0] | a[i+63:i]) >> imm8[5:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst". - -FOR j := 0 to 15 - i := j*32 - dst[i+31:i] := ((b[i+31:i] << 32)[63:0] | a[i+31:i]) >> imm8[4:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst". - -FOR j := 0 to 31 - i := j*16 - dst[i+15:i] := ((b[i+15:i] << 16)[31:0] | a[i+15:i]) >> imm8[3:0] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst". - -FOR j := 0 to 7 - i := j*64 - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << (c[i+63:i] & 63) - dst[i+63:i] := tmp[127:64] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst". - -FOR j := 0 to 15 - i := j*32 - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << (c[i+31:i] & 31) - dst[i+31:i] := tmp[63:32] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst". - -FOR j := 0 to 31 - i := j*16 - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << (c[i+15:i] & 15) - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - i := j*64 - IF k[j] - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] - ELSE - dst[i+63:i] := src[i+63:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst"). - -FOR j := 0 to 7 - i := j*64 - tmp[127:0] := ((a[i+63:i] << 64)[127:0] | b[i+63:i]) << imm8[5:0] - dst[i+63:i] := tmp[127:64] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - i := j*32 - IF k[j] - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] - ELSE - dst[i+31:i] := src[i+31:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst". - -FOR j := 0 to 15 - i := j*32 - tmp[63:0] := ((a[i+31:i] << 32)[63:0] | b[i+31:i]) << imm8[4:0] - dst[i+31:i] := tmp[63:32] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 31 - i := j*16 - IF k[j] - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - - - - - Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst"). - -FOR j := 0 to 31 - i := j*16 - tmp[31:0] := ((a[i+15:i] << 16)[31:0] | b[i+15:i]) << imm8[3:0] - dst[i+15:i] := tmp[31:16] -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Shift -
- - Swizzle - - - - Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] - m := m + 16 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m] - m := m + 16 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Load -
- - Swizzle - - - - Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] - m := m + 8 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Load -
- - Swizzle - - - - - Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m] - m := m + 8 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Load -
- - - - - Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[m+15:m] - m := m + 16 - ELSE - dst[i+15:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[i+15:i] := a[m+15:m] - m := m + 16 - ELSE - dst[i+15:i] := src[i+15:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Swizzle -
- - - - - Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[m+7:m] - m := m + 8 - ELSE - dst[i+7:i] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Swizzle -
- - - - - - Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -m := 0 -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[i+7:i] := a[m+7:m] - m := m + 8 - ELSE - dst[i+7:i] := src[i+7:i] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 16 -m := 0 -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR -dst[511:m] := 0 -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 16 -m := 0 -FOR j := 0 to 31 - i := j*16 - IF k[j] - dst[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR -dst[511:m] := src[511:m] -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Swizzle -
- - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero. - -size := 8 -m := 0 -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR -dst[511:m] := 0 -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Swizzle -
- - - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src". - -size := 8 -m := 0 -FOR j := 0 to 63 - i := j*8 - IF k[j] - dst[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR -dst[511:m] := src[511:m] -dst[MAX:512] := 0 - - - AVX512_VBMI2 -
immintrin.h
- Swizzle -
- - Swizzle - - - - - Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 16 -m := base_addr -FOR j := 0 to 31 - i := j*16 - IF k[j] - MEM[m+size-1:m] := a[i+15:i] - m := m + size - FI -ENDFOR - - - AVX512_VBMI2 -
immintrin.h
- Store -
- - Swizzle - - - - - Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr". - -size := 8 -m := base_addr -FOR j := 0 to 63 - i := j*8 - IF k[j] - MEM[m+size-1:m] := a[i+7:i] - m := m + size - FI -ENDFOR - - - AVX512_VBMI2 -
immintrin.h
- Store -
- - - - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 7 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:256] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 3 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:128] := 0 - - - AVX512_VNNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 15 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 15 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 15 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 - ELSE - dst.dword[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -FOR j := 0 to 15 - IF k[j] - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 - ELSE - dst.dword[j] := src.dword[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 15 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:512] := 0 - - - AVX512_VNNI -
immintrin.h
- Arithmetic -
- - - - - - - - - Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. - -MEM[k1+15:k1] := 0 -MEM[k2+15:k2] := 0 -FOR i := 0 TO 15 - FOR j := 0 TO 15 - match := (a.dword[i] == b.dword[j] ? 1 : 0) - MEM[k1+15:k1].bit[i] |= match - MEM[k2+15:k2].bit[j] |= match - ENDFOR -ENDFOR - - - AVX512_VP2INTERSECT - AVX512F -
immintrin.h
- Mask -
- - - - - - - Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. - -MEM[k1+7:k1] := 0 -MEM[k2+7:k2] := 0 -FOR i := 0 TO 7 - FOR j := 0 TO 7 - match := (a.qword[i] == b.qword[j] ? 1 : 0) - MEM[k1+7:k1].bit[i] |= match - MEM[k2+7:k2].bit[j] |= match - ENDFOR -ENDFOR - - - AVX512_VP2INTERSECT - AVX512F -
immintrin.h
- Mask -
- - - - - - - - - Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. - -MEM[k1+7:k1] := 0 -MEM[k2+7:k2] := 0 -FOR i := 0 TO 3 - FOR j := 0 TO 3 - match := (a.dword[i] == b.dword[j] ? 1 : 0) - MEM[k1+7:k1].bit[i] |= match - MEM[k2+7:k2].bit[j] |= match - ENDFOR -ENDFOR - - - AVX512_VP2INTERSECT - AVX512VL -
immintrin.h
- Mask -
- - - - - - - Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. - -MEM[k1+7:k1] := 0 -MEM[k2+7:k2] := 0 -FOR i := 0 TO 7 - FOR j := 0 TO 7 - match := (a.dword[i] == b.dword[j] ? 1 : 0) - MEM[k1+7:k1].bit[i] |= match - MEM[k2+7:k2].bit[j] |= match - ENDFOR -ENDFOR - - - AVX512_VP2INTERSECT - AVX512VL -
immintrin.h
- Mask -
- - - - - - - Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. - -MEM[k1+7:k1] := 0 -MEM[k2+7:k2] := 0 -FOR i := 0 TO 1 - FOR j := 0 TO 1 - match := (a.qword[i] == b.qword[j] ? 1 : 0) - MEM[k1+7:k1].bit[i] |= match - MEM[k2+7:k2].bit[j] |= match - ENDFOR -ENDFOR - - - AVX512_VP2INTERSECT - AVX512VL -
immintrin.h
- Mask -
- - - - - - - Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers. - -MEM[k1+7:k1] := 0 -MEM[k2+7:k2] := 0 -FOR i := 0 TO 3 - FOR j := 0 TO 3 - match := (a.qword[i] == b.qword[j] ? 1 : 0) - MEM[k1+7:k1].bit[i] |= match - MEM[k2+7:k2].bit[j] |= match - ENDFOR -ENDFOR - - - AVX512_VP2INTERSECT - AVX512VL -
immintrin.h
- Mask -
- - - - - Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". - - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) - dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_IFMA -
immintrin.h
- Arithmetic -
- - - Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". - - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) - dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_IFMA -
immintrin.h
- Arithmetic -
- - - Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". - - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) - dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_IFMA -
immintrin.h
- Arithmetic -
- - - Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". - - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) - dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_IFMA -
immintrin.h
- Arithmetic -
- - - Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". - - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) - dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_IFMA -
immintrin.h
- Arithmetic -
- - - Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". - - -FOR j := 0 to 3 - i := j*64 - tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) - dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_IFMA -
immintrin.h
- Arithmetic -
- - - Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". - - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) - dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_IFMA -
immintrin.h
- Arithmetic -
- - - Multiply packed unsigned 52-bit integers in each 64-bit element of "__Y" and "__Z" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "__X", and store the results in "dst". - - -FOR j := 0 to 1 - i := j*64 - tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) - dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_IFMA -
immintrin.h
- Arithmetic -
- - - - Convert scalar BF16 (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -b := Convert_BF16_To_FP32(MEM[__A+15:__A]) -FOR j := 0 to 7 - m := j*32 - dst[m+31:m] := b -ENDFOR -dst[MAX:256] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -b := Convert_FP16_To_FP32(MEM[__A+15:__A]) -FOR j := 0 to 7 - m := j*32 - dst[m+31:m] := b -ENDFOR -dst[MAX:256] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 7 - m := j*32 - dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+15:__A+m]) -ENDFOR -dst[MAX:256] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 7 - m := j*32 - dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+15:__A+m]) -ENDFOR -dst[MAX:256] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 7 - m := j*32 - dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+31:__A+m+16]) -ENDFOR -dst[MAX:256] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 7 - m := j*32 - dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+31:__A+m+16]) -ENDFOR -dst[MAX:256] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 7 - dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert scalar BF16 (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -b := Convert_BF16_To_FP32(MEM[__A+15:__A]) -FOR j := 0 to 3 - m := j*32 - dst[m+31:m] := b -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting at location "__A" to a single-precision (32-bit) floating-point, broadcast it to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -b := Convert_FP16_To_FP32(MEM[__A+15:__A]) -FOR j := 0 to 3 - m := j*32 - dst[m+31:m] := b -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 3 - m := j*32 - dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+15:__A+m]) -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 3 - m := j*32 - dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+15:__A+m]) -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 3 - m := j*32 - dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+m+31:__A+m+16]) -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at location "__A" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 3 - m := j*32 - dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+m+31:__A+m+16]) -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 3 - dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 7 - dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - Convert packed single-precision (32-bit) floating-point elements in "__A" to packed BF16 (16-bit) floating-point elements, and store the results in "dst". - - -FOR j := 0 to 3 - dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) -ENDFOR -dst[MAX:128] := 0 - - - AVX_NE_CONVERT -
immintrin.h
- Convert -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:256] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:256] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:256] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:256] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:128] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:128] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:128] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:128] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:256] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:256] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:256] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 7 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:256] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:128] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j])) - tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1])) - tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2])) - tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3])) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:128] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := src.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:128] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst". - -FOR j := 0 to 3 - tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j]) - tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1]) - dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:128] := 0 - - - AVX_VNNI -
immintrin.h
- Arithmetic -
- - - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) - tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) - tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) - dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:256] := 0 - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) - tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) - tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) - dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:256] := 0 - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) - tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) - tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) - dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:256] := 0 - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) - tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of signed 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) - tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) - dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:128] := 0 - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) - tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding signed 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) - tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) - dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:128] := 0 - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) - tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in "__A" with corresponding unsigned 16-bit integers in "__B", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) - tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) - dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) -ENDFOR -dst[MAX:128] := 0 - - - - AVX_VNNI_INT16 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) - tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) - tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) - tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) - tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) - tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) - tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) - dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:256] := 0 - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) - tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) - tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) - tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) - tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) - tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) - tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) - dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:256] := 0 - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) - tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) - tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) - tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:256] := 0 - - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with unsigned saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 7 - tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) - tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) - tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) - tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) - dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:256] := 0 - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) - tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) - tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) - tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding signed 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) - tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) - tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) - tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) - dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:128] := 0 - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) - tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) - tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) - tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of signed 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with signed saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) - tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) - tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) - tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) - dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:128] := 0 - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W", and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) - tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) - tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) - tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) - dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 -ENDFOR -dst[MAX:128] := 0 - - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "__A" with corresponding unsigned 8-bit integers in "__B", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "__W" with unsigned saturation, and store the packed 32-bit results in "dst". - - -FOR j := 0 to 3 - tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) - tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) - tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) - tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) - dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) -ENDFOR -dst[MAX:128] := 0 - - - - AVX_VNNI_INT8 -
immintrin.h
- Arithmetic -
- - - - - - - Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start". - -tmp[511:0] := a -dst[31:0] := ZeroExtend32(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - - Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control". - -start := control[7:0] -len := control[15:8] -tmp[511:0] := a -dst[31:0] := ZeroExtend32(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - - - Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start". - -tmp[511:0] := a -dst[63:0] := ZeroExtend64(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - - Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control".. - -start := control[7:0] -len := control[15:8] -tmp[511:0] := a -dst[63:0] := ZeroExtend64(tmp[(start[7:0] + len[7:0] - 1):start[7:0]]) - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Extract the lowest set bit from unsigned 32-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a". - -dst := (-a) AND a - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Extract the lowest set bit from unsigned 64-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a". - -dst := (-a) AND a - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 32-bit integer "a". - -dst := (a - 1) XOR a - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 64-bit integer "a". - -dst := (a - 1) XOR a - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a". - -dst := (a - 1) AND a - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a". - -dst := (a - 1) AND a - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - - Compute the bitwise NOT of 32-bit integer "a" and then AND with b, and store the results in dst. - -dst[31:0] := ((NOT a[31:0]) AND b[31:0]) - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - - Compute the bitwise NOT of 64-bit integer "a" and then AND with b, and store the results in dst. - -dst[63:0] := ((NOT a[63:0]) AND b[63:0]) - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Count the number of trailing zero bits in unsigned 16-bit integer "a", and return that count in "dst". - -tmp := 0 -dst := 0 -DO WHILE ((tmp < 16) AND a[tmp] == 0) - tmp := tmp + 1 - dst := dst + 1 -OD - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst". - -tmp := 0 -dst := 0 -DO WHILE ((tmp < 32) AND a[tmp] == 0) - tmp := tmp + 1 - dst := dst + 1 -OD - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst". - -tmp := 0 -dst := 0 -DO WHILE ((tmp < 64) AND a[tmp] == 0) - tmp := tmp + 1 - dst := dst + 1 -OD - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst". - -tmp := 0 -dst := 0 -DO WHILE ((tmp < 32) AND a[tmp] == 0) - tmp := tmp + 1 - dst := dst + 1 -OD - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst". - -tmp := 0 -dst := 0 -DO WHILE ((tmp < 64) AND a[tmp] == 0) - tmp := tmp + 1 - dst := dst + 1 -OD - - - BMI1 -
immintrin.h
- Bit Manipulation -
- - - - - - - Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index". - -n := index[7:0] -dst := a -IF (n < 32) - dst[31:n] := 0 -FI - - - BMI2 -
immintrin.h
- Bit Manipulation -
- - - - - Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index". - -n := index[7:0] -dst := a -IF (n < 64) - dst[63:n] := 0 -FI - - - BMI2 -
immintrin.h
- Bit Manipulation -
- - - - - Deposit contiguous low bits from unsigned 32-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero. - -tmp := a -dst := 0 -m := 0 -k := 0 -DO WHILE m < 32 - IF mask[m] == 1 - dst[m] := tmp[k] - k := k + 1 - FI - m := m + 1 -OD - - - BMI2 -
immintrin.h
- Bit Manipulation -
- - - - - Deposit contiguous low bits from unsigned 64-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero. - -tmp := a -dst := 0 -m := 0 -k := 0 -DO WHILE m < 64 - IF mask[m] == 1 - dst[m] := tmp[k] - k := k + 1 - FI - m := m + 1 -OD - - - BMI2 -
immintrin.h
- Bit Manipulation -
- - - - - Extract bits from unsigned 32-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero. - -tmp := a -dst := 0 -m := 0 -k := 0 -DO WHILE m < 32 - IF mask[m] == 1 - dst[k] := tmp[m] - k := k + 1 - FI - m := m + 1 -OD - - - BMI2 -
immintrin.h
- Bit Manipulation -
- - - - - Extract bits from unsigned 64-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero. - -tmp := a -dst := 0 -m := 0 -k := 0 -DO WHILE m < 64 - IF mask[m] == 1 - dst[k] := tmp[m] - k := k + 1 - FI - m := m + 1 -OD - - - BMI2 -
immintrin.h
- Bit Manipulation -
- - - - - - Multiply unsigned 32-bit integers "a" and "b", store the low 32-bits of the result in "dst", and store the high 32-bits in "hi". This does not read or write arithmetic flags. - -dst[31:0] := (a * b)[31:0] -MEM[hi+31:hi] := (a * b)[63:32] - - - BMI2 -
immintrin.h
- Arithmetic -
- - - - - - Multiply unsigned 64-bit integers "a" and "b", store the low 64-bits of the result in "dst", and store the high 64-bits in "hi". This does not read or write arithmetic flags. - -dst[63:0] := (a * b)[63:0] -MEM[hi+63:hi] := (a * b)[127:64] - - - BMI2 -
immintrin.h
- Arithmetic -
- - - - - - Increment the shadow stack pointer by 4 times the value specified in bits [7:0] of "a". - -SSP := SSP + a[7:0] * 4 - - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - Increment the shadow stack pointer by 8 times the value specified in bits [7:0] of "a". - -SSP := SSP + a[7:0] * 8 - - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - Read the low 32-bits of the current shadow stack pointer, and store the result in "dst". - dst := SSP[31:0] - - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - Read the current shadow stack pointer, and store the result in "dst". - dst := SSP[63:0] - - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - Save the previous shadow stack pointer context. - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - Restore the saved shadow stack pointer from the shadow stack restore token previously created on shadow stack by saveprevssp. - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - - Write 32-bit value in "val" to a shadow stack page in memory specified by "p". - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - - Write 64-bit value in "val" to a shadow stack page in memory specified by "p". - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - - Write 32-bit value in "val" to a user shadow stack page in memory specified by "p". - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - - Write 64-bit value in "val" to a user shadow stack page in memory specified by "p". - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - Mark shadow stack pointed to by IA32_PL0_SSP as busy. - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - Mark shadow stack pointed to by "p" as not busy. - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - If CET is enabled, read the low 32-bits of the current shadow stack pointer, and store the result in "dst". Otherwise return 0. - dst := SSP[31:0] - - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - If CET is enabled, read the current shadow stack pointer, and store the result in "dst". Otherwise return 0. - dst := SSP[63:0] - - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - Increment the shadow stack pointer by 4 times the value specified in bits [7:0] of "a". - -SSP := SSP + a[7:0] * 4 - - - CET_SS -
immintrin.h
- Miscellaneous -
- - - - - Hint to hardware that the cache line that contains "p" should be demoted from the cache closest to the processor core to a level more distant from the processor core. - - CLDEMOTE -
immintrin.h
- Miscellaneous -
- - - - - - Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy. - - CLFLUSHOPT -
immintrin.h
- General Support -
- - - - - - Write back to memory the cache line that contains "p" from any level of the cache hierarchy in the cache coherence domain. - - CLWB -
immintrin.h
- General Support -
- - - - - - - - - Compares the value from the memory "__A" with the value of "__B". If the specified condition "__D" is met, then add the third operand "__C" to the "__A" and write it into "__A", else the value of "__A" is unchanged. The return value is the original value of "__A". - CASE (__D[3:0]) OF -0: OP := _CMPCCX_O -1: OP := _CMPCCX_NO -2: OP := _CMPCCX_B -3: OP := _CMPCCX_NB -4: OP := _CMPCCX_Z -5: OP := _CMPCCX_NZ -6: OP := _CMPCCX_BE -7: OP := _CMPCCX_NBE -8: OP := _CMPCCX_S -9: OP := _CMPCCX_NS -10: OP := _CMPCCX_P -11: OP := _CMPCCX_NP -12: OP := _CMPCCX_L -13: OP := _CMPCCX_NL -14: OP := _CMPCCX_LE -15: OP := _CMPCCX_NLE -ESAC -tmp1 := LOAD_LOCK(__A) -tmp2 := tmp1 + __C -IF (tmp1[31:0] OP __B[31:0]) - STORE_UNLOCK(__A, tmp2) -ELSE - STORE_UNLOCK(__A, tmp1) -FI -dst[31:0] := tmp1[31:0] - - - - - - - - - - - - - - - - - - CMPCCXADD -
immintrin.h
- Arithmetic -
- - - - - - - Compares the value from the memory "__A" with the value of "__B". If the specified condition "__D" is met, then add the third operand "__C" to the "__A" and write it into "__A", else the value of "__A" is unchanged. The return value is the original value of "__A". - CASE (__D[3:0]) OF -0: OP := _CMPCCX_O -1: OP := _CMPCCX_NO -2: OP := _CMPCCX_B -3: OP := _CMPCCX_NB -4: OP := _CMPCCX_Z -5: OP := _CMPCCX_NZ -6: OP := _CMPCCX_BE -7: OP := _CMPCCX_NBE -8: OP := _CMPCCX_S -9: OP := _CMPCCX_NS -10: OP := _CMPCCX_P -11: OP := _CMPCCX_NP -12: OP := _CMPCCX_L -13: OP := _CMPCCX_NL -14: OP := _CMPCCX_LE -15: OP := _CMPCCX_NLE -ESAC -tmp1 := LOAD_LOCK(__A) -tmp2 := tmp1 + __C -IF (tmp1[63:0] OP __B[63:0]) - STORE_UNLOCK(__A, tmp2) -ELSE - STORE_UNLOCK(__A, tmp1) -FI -dst[63:0] := tmp1[63:0] - - - - - - - - - - - - - - - - - - CMPCCXADD -
immintrin.h
- Arithmetic -
- - - - - Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst". - tmp1[7:0] := v[0:7] // bit reflection -tmp2[31:0] := crc[0:31] // bit reflection -tmp3[39:0] := tmp1[7:0] << 32 -tmp4[39:0] := tmp2[31:0] << 8 -tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0] -tmp6[31:0] := MOD2(tmp5[39:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 -dst[31:0] := tmp6[0:31] // bit reflection - - - CRC32 -
nmmintrin.h
- Cryptography -
- - - - - Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst". - tmp1[15:0] := v[0:15] // bit reflection -tmp2[31:0] := crc[0:31] // bit reflection -tmp3[47:0] := tmp1[15:0] << 32 -tmp4[47:0] := tmp2[31:0] << 16 -tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0] -tmp6[31:0] := MOD2(tmp5[47:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 -dst[31:0] := tmp6[0:31] // bit reflection - - - CRC32 -
nmmintrin.h
- Cryptography -
- - - - - Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst". - tmp1[31:0] := v[0:31] // bit reflection -tmp2[31:0] := crc[0:31] // bit reflection -tmp3[63:0] := tmp1[31:0] << 32 -tmp4[63:0] := tmp2[31:0] << 32 -tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0] -tmp6[31:0] := MOD2(tmp5[63:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 -dst[31:0] := tmp6[0:31] // bit reflection - - - CRC32 -
nmmintrin.h
- Cryptography -
- - - - - Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst". - tmp1[63:0] := v[0:63] // bit reflection -tmp2[31:0] := crc[0:31] // bit reflection -tmp3[95:0] := tmp1[31:0] << 32 -tmp4[95:0] := tmp2[63:0] << 64 -tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0] -tmp6[31:0] := MOD2(tmp5[95:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 -dst[31:0] := tmp6[0:31] // bit reflection - - - CRC32 -
nmmintrin.h
- Cryptography -
- - - - - - - Reads 64-byte command pointed by "__src", formats 64-byte enqueue store data, and performs 64-byte enqueue store to memory pointed by "__dst". This intrinsics may only be used in User mode. - - ENQCMD -
immintrin.h
- Unknown -
- - - - - Reads 64-byte command pointed by "__src", formats 64-byte enqueue store data, and performs 64-byte enqueue store to memory pointed by "__dst" This intrinsic may only be used in Privileged mode. - - ENQCMD -
immintrin.h
- Unknown -
- - - - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - m := j*16 - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) -ENDFOR -dst[MAX:256] := 0 - - - F16C -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_imm_note] - -FOR j := 0 to 7 - i := 16*j - l := 32*j - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) -ENDFOR -dst[MAX:128] := 0 - - - F16C -
immintrin.h
- Convert -
- - - - Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - m := j*16 - dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m]) -ENDFOR -dst[MAX:128] := 0 - - - F16C -
immintrin.h
- Convert -
- - - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". - [round_imm_note] - -FOR j := 0 to 3 - i := 16*j - l := 32*j - dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l]) -ENDFOR -dst[MAX:64] := 0 - - - F16C -
immintrin.h
- Convert -
- - - - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (a[63:0] * b[63:0]) + c[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := (a[31:0] * b[31:0]) + c[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (a[63:0] * b[63:0]) - c[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := (a[31:0] * b[31:0]) - c[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i] - ELSE - dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i] - ELSE - dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i] - FI -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 3 - i := j*64 - dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i] -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] -ENDFOR -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". - -FOR j := 0 to 7 - i := j*32 - dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i] -ENDFOR -dst[MAX:256] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0] -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - - Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0] -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - - - FMA -
immintrin.h
- Arithmetic -
- - - - - Read the FS segment base register and store the 32-bit result in "dst". - dst[31:0] := FS_Segment_Base_Register -dst[63:32] := 0 - - - FSGSBASE -
immintrin.h
- General Support -
- - - Read the FS segment base register and store the 64-bit result in "dst". - dst[63:0] := FS_Segment_Base_Register - - - FSGSBASE -
immintrin.h
- General Support -
- - - Read the GS segment base register and store the 32-bit result in "dst". - dst[31:0] := GS_Segment_Base_Register -dst[63:32] := 0 - - - FSGSBASE -
immintrin.h
- General Support -
- - - Read the GS segment base register and store the 64-bit result in "dst". - dst[63:0] := GS_Segment_Base_Register - - - FSGSBASE -
immintrin.h
- General Support -
- - - - Write the unsigned 32-bit integer "a" to the FS segment base register. - -FS_Segment_Base_Register[31:0] := a[31:0] -FS_Segment_Base_Register[63:32] := 0 - - - FSGSBASE -
immintrin.h
- General Support -
- - - - Write the unsigned 64-bit integer "a" to the FS segment base register. - -FS_Segment_Base_Register[63:0] := a[63:0] - - - FSGSBASE -
immintrin.h
- General Support -
- - - - Write the unsigned 32-bit integer "a" to the GS segment base register. - -GS_Segment_Base_Register[31:0] := a[31:0] -GS_Segment_Base_Register[63:32] := 0 - - - FSGSBASE -
immintrin.h
- General Support -
- - - - Write the unsigned 64-bit integer "a" to the GS segment base register. - -GS_Segment_Base_Register[63:0] := a[63:0] - - - FSGSBASE -
immintrin.h
- General Support -
- - - - - - Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary. - state_x87_fpu_mmx_sse := fxrstor(MEM[mem_addr+512*8:mem_addr]) - - - FXSR -
immintrin.h
- OS-Targeted -
- - - - Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE64 instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary. - state_x87_fpu_mmx_sse := fxrstor64(MEM[mem_addr+512*8:mem_addr]) - - - FXSR -
immintrin.h
- OS-Targeted -
- - - - Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor. - MEM[mem_addr+512*8:mem_addr] := fxsave(state_x87_fpu_mmx_sse) - - - FXSR -
immintrin.h
- OS-Targeted -
- - - - Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor. - MEM[mem_addr+512*8:mem_addr] := fxsave64(state_x87_fpu_mmx_sse) - - - FXSR -
immintrin.h
- OS-Targeted -
- - - - - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 63 - IF k[j] - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) - ELSE - dst.byte[j] := 0 - FI -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 63 - IF k[j] - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) - ELSE - dst.byte[j] := src.byte[j] - FI -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 63 - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 7 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 7 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := src.qword[j].byte[i] - FI - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 7 - FOR i := 0 to 7 - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 7 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 7 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := src.qword[j].byte[b] - FI - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 7 - FOR i := 0 to 7 - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ENDFOR -ENDFOR -dst[MAX:512] := 0 - - - GFNI - AVX512F -
immintrin.h
- Arithmetic -
- - - - - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 31 - IF k[j] - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) - ELSE - dst.byte[j] := 0 - FI -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 31 - IF k[j] - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) - ELSE - dst.byte[j] := src.byte[j] - FI -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 31 - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 15 - IF k[j] - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) - ELSE - dst.byte[j] := 0 - FI -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 15 - IF k[j] - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) - ELSE - dst.byte[j] := src.byte[j] - FI -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. - -DEFINE gf2p8mul_byte(src1byte, src2byte) { - tword := 0 - FOR i := 0 to 7 - IF src2byte.bit[i] - tword := tword XOR (src1byte << i) - FI - ENDFOR - FOR i := 14 downto 8 - p := 0x11B << (i-8) - IF tword.bit[i] - tword := tword XOR p - FI - ENDFOR - RETURN tword.byte[0] -} -FOR j := 0 TO 15 - dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j]) -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 3 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 3 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := src.qword[j].byte[i] - FI - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 3 - FOR i := 0 to 7 - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 1 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 1 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := src.qword[j].byte[i] - FI - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst". - -DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 1 - FOR i := 0 to 7 - dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b) - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 3 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 3 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := src.qword[j].byte[i] - FI - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 3 - FOR i := 0 to 7 - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ENDFOR -ENDFOR -dst[MAX:256] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 1 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := 0 - FI - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 1 - FOR i := 0 to 7 - IF k[j*8+i] - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ELSE - dst.qword[j].byte[i] := src.qword[j].byte[i] - FI - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst". - DEFINE parity(x) { - t := 0 - FOR i := 0 to 7 - t := t XOR x.bit[i] - ENDFOR - RETURN t -} -DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) { - FOR i := 0 to 7 - retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i] - ENDFOR - RETURN retbyte -} -FOR j := 0 TO 1 - FOR i := 0 to 7 - dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b) - ENDFOR -ENDFOR -dst[MAX:128] := 0 - - - GFNI - AVX512VL -
immintrin.h
- Arithmetic -
- - - - - - Provides a hint to the processor to selectively reset the prediction history of the current logical processor specified by a signed 32-bit integer "__eax". - - HRESET -
immintrin.h
- General Support -
- - - - - - Invalidate mappings in the Translation Lookaside Buffers (TLBs) and paging-structure caches for the processor context identifier (PCID) specified by "descriptor" based on the invalidation type specified in "type". - The PCID "descriptor" is specified as a 16-byte memory operand (with no alignment restrictions) where bits [11:0] specify the PCID, and bits [127:64] specify the linear address; bits [63:12] are reserved. - The types supported are: - 0) Individual-address invalidation: If "type" is 0, the logical processor invalidates mappings for a single linear address and tagged with the PCID specified in "descriptor", except global translations. The instruction may also invalidate global translations, mappings for other linear addresses, or mappings tagged with other PCIDs. - 1) Single-context invalidation: If "type" is 1, the logical processor invalidates all mappings tagged with the PCID specified in "descriptor" except global translations. In some cases, it may invalidate mappings for other PCIDs as well. - 2) All-context invalidation: If "type" is 2, the logical processor invalidates all mappings tagged with any PCID. - 3) All-context invalidation, retaining global translations: If "type" is 3, the logical processor invalidates all mappings tagged with any PCID except global translations, ignoring "descriptor". The instruction may also invalidate global translations as well. - -CASE type[1:0] OF -0: // individual-address invalidation retaining global translations - OP_PCID := MEM[descriptor+11:descriptor] - ADDR := MEM[descriptor+127:descriptor+64] - BREAK -1: // single PCID invalidation retaining globals - OP_PCID := MEM[descriptor+11:descriptor] - // invalidate all mappings tagged with OP_PCID except global translations - BREAK -2: // all PCID invalidation - // invalidate all mappings tagged with any PCID - BREAK -3: // all PCID invalidation retaining global translations - // invalidate all mappings tagged with any PCID except global translations - BREAK -ESAC - - - INVPCID -
immintrin.h
- OS-Targeted -
- - - - Flag - - - - - Decrypt 10 rounds of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". - MEM[__odata+127:__odata] := AES128Decrypt (__idata[127:0], __h[383:0]) -dst := ZF - - - KEYLOCKER -
immintrin.h
- Cryptography -
- - Flag - - - - - Decrypt 10 rounds of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". - MEM[__odata+127:__odata] := AES256Decrypt (__idata[127:0], __h[511:0]) -dst := ZF - - - KEYLOCKER -
immintrin.h
- Cryptography -
- - Flag - - - - - Encrypt 10 rounds of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. - MEM[__odata+127:__odata] := AES128Encrypt (__idata[127:0], __h[383:0]) -dst := ZF - - - KEYLOCKER -
immintrin.h
- Cryptography -
- - Flag - - - - - Encrypt 10 rounds of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". - MEM[__odata+127:__odata] := AES256Encrypt (__idata[127:0], __h[511:0]) -dst := ZF - - - KEYLOCKER -
immintrin.h
- Cryptography -
- - Flag - - - - - Wrap a 128-bit AES key from "__key" into a 384-bit key __h stored in "__h" and set IWKey's NoBackup and KeySource bits in "dst". The explicit source operand "__htype" specifies __h restrictions. - __h[383:0] := WrapKey128(__key[127:0], __htype) -dst[0] := IWKey.NoBackup -dst[4:1] := IWKey.KeySource[3:0] - - - KEYLOCKER -
immintrin.h
- Cryptography -
- - Flag - - - - - - Wrap a 256-bit AES key from "__key_hi" and "__key_lo" into a 512-bit key stored in "__h" and set IWKey's NoBackup and KeySource bits in "dst". The 32-bit "__htype" specifies __h restrictions. - __h[511:0] := WrapKey256(__key_lo[127:0], __key_hi[127:0], __htype) -dst[0] := IWKey.NoBackup -dst[4:1] := IWKey.KeySource[3:0] - - - KEYLOCKER -
immintrin.h
- Cryptography -
- - Flag - - - - - - Load internal wrapping key (IWKey). The 32-bit unsigned integer "__ctl" specifies IWKey's KeySource and whether backing up the key is permitted. IWKey's 256-bit encryption key is loaded from "__enkey_lo" and "__enkey_hi". IWKey's 128-bit integrity key is loaded from "__intkey". - - KEYLOCKER -
immintrin.h
- Cryptography -
- - Flag - - - - - Decrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". - FOR i := 0 to 7 - __odata[i] := AES128Decrypt (__idata[i], __h[383:0]) -ENDFOR -dst := ZF - - - KEYLOCKER_WIDE -
immintrin.h
- Cryptography -
- - Flag - - - - - Decrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". - FOR i := 0 to 7 - __odata[i] := AES256Decrypt (__idata[i], __h[511:0]) -ENDFOR -dst := ZF - - - KEYLOCKER_WIDE -
immintrin.h
- Cryptography -
- - Flag - - - - - Encrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 128-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". - FOR i := 0 to 7 - __odata[i] := AES128Encrypt (__idata[i], __h[383:0]) -ENDFOR -dst := ZF - - - KEYLOCKER_WIDE -
immintrin.h
- Cryptography -
- - Flag - - - - - Encrypt 10 rounds of 8 groups of unsigned 8-bit integers in "__idata" using 256-bit AES key specified in "__h", store the resulting unsigned 8-bit integers into the corresponding elements of "__odata", and set "dst" to the ZF flag status. If exception happens, set ZF flag to 1 and zero initialize "__odata". - FOR i := 0 to 7 - __odata[i] := AES256Encrypt (__idata[i], __h[512:0]) -ENDFOR -dst := ZF - - - KEYLOCKER_WIDE -
immintrin.h
- Cryptography -
- - - - - Count the number of leading zero bits in unsigned 32-bit integer "a", and return that count in "dst". - -tmp := 31 -dst := 0 -DO WHILE (tmp >= 0 AND a[tmp] == 0) - tmp := tmp - 1 - dst := dst + 1 -OD - - - LZCNT -
immintrin.h
- Bit Manipulation -
- - - - Count the number of leading zero bits in unsigned 64-bit integer "a", and return that count in "dst". - -tmp := 63 -dst := 0 -DO WHILE (tmp >= 0 AND a[tmp] == 0) - tmp := tmp - 1 - dst := dst + 1 -OD - - - LZCNT -
immintrin.h
- Bit Manipulation -
- - - - - - Copy 64-bit integer "a" to "dst". - -dst[63:0] := a[63:0] - - - MMX -
mmintrin.h
- Convert -
- - - - Copy 64-bit integer "a" to "dst". - -dst[63:0] := a[63:0] - - - MMX -
mmintrin.h
- Convert -
- - - - Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst". - -dst[31:0] := a[31:0] -dst[63:32] := 0 - - - MMX -
mmintrin.h
- Convert -
- - - - Copy the lower 32-bit integer in "a" to "dst". - -dst[31:0] := a[31:0] - - - MMX -
mmintrin.h
- Convert -
- - - - Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst". - -dst[31:0] := a[31:0] -dst[63:32] := 0 - - - MMX -
mmintrin.h
- Convert -
- - - - Copy the lower 32-bit integer in "a" to "dst". - -dst[31:0] := a[31:0] - - - MMX -
mmintrin.h
- Convert -
- - - - Copy 64-bit integer "a" to "dst". - -dst[63:0] := a[63:0] - - - MMX -
mmintrin.h
- Convert -
- - - - Copy 64-bit integer "a" to "dst". - -dst[63:0] := a[63:0] - - - MMX -
mmintrin.h
- Convert -
- - - - Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures. - - MMX -
mmintrin.h
- General Support -
- - - - Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures. - - MMX -
mmintrin.h
- General Support -
- - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". - -dst[7:0] := Saturate8(a[15:0]) -dst[15:8] := Saturate8(a[31:16]) -dst[23:16] := Saturate8(a[47:32]) -dst[31:24] := Saturate8(a[63:48]) -dst[39:32] := Saturate8(b[15:0]) -dst[47:40] := Saturate8(b[31:16]) -dst[55:48] := Saturate8(b[47:32]) -dst[63:56] := Saturate8(b[63:48]) - - - MMX -
mmintrin.h
- Miscellaneous -
- - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". - -dst[15:0] := Saturate16(a[31:0]) -dst[31:16] := Saturate16(a[63:32]) -dst[47:32] := Saturate16(b[31:0]) -dst[63:48] := Saturate16(b[63:32]) - - - MMX -
mmintrin.h
- Miscellaneous -
- - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". - -dst[7:0] := SaturateU8(a[15:0]) -dst[15:8] := SaturateU8(a[31:16]) -dst[23:16] := SaturateU8(a[47:32]) -dst[31:24] := SaturateU8(a[63:48]) -dst[39:32] := SaturateU8(b[15:0]) -dst[47:40] := SaturateU8(b[31:16]) -dst[55:48] := SaturateU8(b[47:32]) -dst[63:56] := SaturateU8(b[63:48]) - - - MMX -
mmintrin.h
- Miscellaneous -
- - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". - -dst[7:0] := Saturate8(a[15:0]) -dst[15:8] := Saturate8(a[31:16]) -dst[23:16] := Saturate8(a[47:32]) -dst[31:24] := Saturate8(a[63:48]) -dst[39:32] := Saturate8(b[15:0]) -dst[47:40] := Saturate8(b[31:16]) -dst[55:48] := Saturate8(b[47:32]) -dst[63:56] := Saturate8(b[63:48]) - - - MMX -
mmintrin.h
- Miscellaneous -
- - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". - -dst[15:0] := Saturate16(a[31:0]) -dst[31:16] := Saturate16(a[63:32]) -dst[47:32] := Saturate16(b[31:0]) -dst[63:48] := Saturate16(b[63:32]) - - - MMX -
mmintrin.h
- Miscellaneous -
- - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". - -dst[7:0] := SaturateU8(a[15:0]) -dst[15:8] := SaturateU8(a[31:16]) -dst[23:16] := SaturateU8(a[47:32]) -dst[31:24] := SaturateU8(a[63:48]) -dst[39:32] := SaturateU8(b[15:0]) -dst[47:40] := SaturateU8(b[31:16]) -dst[55:48] := SaturateU8(b[47:32]) -dst[63:56] := SaturateU8(b[63:48]) - - - MMX -
mmintrin.h
- Miscellaneous -
- - - - - Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]) { - dst[7:0] := src1[39:32] - dst[15:8] := src2[39:32] - dst[23:16] := src1[47:40] - dst[31:24] := src2[47:40] - dst[39:32] := src1[55:48] - dst[47:40] := src2[55:48] - dst[55:48] := src1[63:56] - dst[63:56] := src2[63:56] - RETURN dst[63:0] -} -dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0]) - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]) { - dst[15:0] := src1[47:32] - dst[31:16] := src2[47:32] - dst[47:32] := src1[63:48] - dst[63:48] := src2[63:48] - RETURN dst[63:0] -} -dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0]) - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". - -dst[31:0] := a[63:32] -dst[63:32] := b[63:32] - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_BYTES(src1[63:0], src2[63:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - RETURN dst[63:0] -} -dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0]) - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_WORDS(src1[63:0], src2[63:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - RETURN dst[63:0] -} -dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0]) - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". - -dst[31:0] := a[31:0] -dst[63:32] := b[31:0] - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]) { - dst[7:0] := src1[39:32] - dst[15:8] := src2[39:32] - dst[23:16] := src1[47:40] - dst[31:24] := src2[47:40] - dst[39:32] := src1[55:48] - dst[47:40] := src2[55:48] - dst[55:48] := src1[63:56] - dst[63:56] := src2[63:56] - RETURN dst[63:0] -} -dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0]) - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]) { - dst[15:0] := src1[47:32] - dst[31:16] := src2[47:32] - dst[47:32] := src1[63:48] - dst[63:48] := src2[63:48] - RETURN dst[63:0] -} -dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0]) - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". - -dst[31:0] := a[63:32] -dst[63:32] := b[63:32] - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_BYTES(src1[63:0], src2[63:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - RETURN dst[63:0] -} -dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0]) - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_WORDS(src1[63:0], src2[63:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - RETURN dst[63:0] -} -dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0]) - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". - -dst[31:0] := a[31:0] -dst[63:32] := b[31:0] - - - MMX -
mmintrin.h
- Swizzle -
- - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := a[i+7:i] + b[i+7:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := a[i+15:i] + b[i+15:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := a[i+7:i] - b[i+7:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := a[i+15:i] - b[i+15:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 3 - i := j*16 - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 3 - i := j*16 - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[15:0] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := a[i+7:i] + b[i+7:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := a[i+15:i] + b[i+15:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := a[i+7:i] - b[i+7:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := a[i+15:i] - b[i+15:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 3 - i := j*16 - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 3 - i := j*16 - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[15:0] -ENDFOR - - - MMX -
mmintrin.h
- Arithmetic -
- - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". - -IF count[63:0] > 63 - dst[63:0] := 0 -ELSE - dst[63:0] := ZeroExtend64(a[63:0] << count[63:0]) -FI - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". - -IF imm8[7:0] > 63 - dst[63:0] := 0 -ELSE - dst[63:0] := ZeroExtend64(a[63:0] << imm8[7:0]) -FI - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". - -IF count[63:0] > 63 - dst[63:0] := 0 -ELSE - dst[63:0] := ZeroExtend64(a[63:0] >> count[63:0]) -FI - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". - -IF imm8[7:0] > 63 - dst[63:0] := 0 -ELSE - dst[63:0] := ZeroExtend64(a[63:0] >> imm8[7:0]) -FI - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst". - -IF count[63:0] > 63 - dst[63:0] := 0 -ELSE - dst[63:0] := ZeroExtend64(a[63:0] << count[63:0]) -FI - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst". - -IF imm8[7:0] > 63 - dst[63:0] := 0 -ELSE - dst[63:0] := ZeroExtend64(a[63:0] << imm8[7:0]) -FI - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst". - -IF count[63:0] > 63 - dst[63:0] := 0 -ELSE - dst[63:0] := ZeroExtend64(a[63:0] >> count[63:0]) -FI - - - MMX -
mmintrin.h
- Shift -
- - - - - Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst". - -IF imm8[7:0] > 63 - dst[63:0] := 0 -ELSE - dst[63:0] := ZeroExtend64(a[63:0] >> imm8[7:0]) -FI - - - MMX -
mmintrin.h
- Shift -
- - - - - Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[63:0] := (a[63:0] AND b[63:0]) - - - MMX -
mmintrin.h
- Logical -
- - - - - Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". - -dst[63:0] := ((NOT a[63:0]) AND b[63:0]) - - - MMX -
mmintrin.h
- Logical -
- - - - - Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[63:0] := (a[63:0] OR b[63:0]) - - - MMX -
mmintrin.h
- Logical -
- - - - - Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[63:0] := (a[63:0] XOR b[63:0]) - - - MMX -
mmintrin.h
- Logical -
- - - - - Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[63:0] := (a[63:0] AND b[63:0]) - - - MMX -
mmintrin.h
- Logical -
- - - - - Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". - -dst[63:0] := ((NOT a[63:0]) AND b[63:0]) - - - MMX -
mmintrin.h
- Logical -
- - - - - Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[63:0] := (a[63:0] OR b[63:0]) - - - MMX -
mmintrin.h
- Logical -
- - - - - Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[63:0] := (a[63:0] XOR b[63:0]) - - - MMX -
mmintrin.h
- Logical -
- - - - - Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - MMX -
mmintrin.h
- Compare -
- - - - Return vector of type __m64 with all elements set to zero. - -dst[MAX:0] := 0 - - - MMX -
mmintrin.h
- Set -
- - - - - Set packed 32-bit integers in "dst" with the supplied values. - -dst[31:0] := e0 -dst[63:32] := e1 - - MMX -
mmintrin.h
- Set -
- - - - - - - Set packed 16-bit integers in "dst" with the supplied values. - -dst[15:0] := e0 -dst[31:16] := e1 -dst[47:32] := e2 -dst[63:48] := e3 - - MMX -
mmintrin.h
- Set -
- - - - - - - - - - - Set packed 8-bit integers in "dst" with the supplied values. - -dst[7:0] := e0 -dst[15:8] := e1 -dst[23:16] := e2 -dst[31:24] := e3 -dst[39:32] := e4 -dst[47:40] := e5 -dst[55:48] := e6 -dst[63:56] := e7 - - MMX -
mmintrin.h
- Set -
- - - - Broadcast 32-bit integer "a" to all elements of "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR - - MMX -
mmintrin.h
- Set -
- - - - Broadcast 16-bit integer "a" to all all elements of "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := a[15:0] -ENDFOR - - MMX -
mmintrin.h
- Set -
- - - - Broadcast 8-bit integer "a" to all elements of "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := a[7:0] -ENDFOR - - MMX -
mmintrin.h
- Set -
- - - - - Set packed 32-bit integers in "dst" with the supplied values in reverse order. - -dst[31:0] := e1 -dst[63:32] := e0 - - MMX -
mmintrin.h
- Set -
- - - - - - - Set packed 16-bit integers in "dst" with the supplied values in reverse order. - -dst[15:0] := e3 -dst[31:16] := e2 -dst[47:32] := e1 -dst[63:48] := e0 - - MMX -
mmintrin.h
- Set -
- - - - - - - - - - - Set packed 8-bit integers in "dst" with the supplied values in reverse order. - -dst[7:0] := e7 -dst[15:8] := e6 -dst[23:16] := e5 -dst[31:24] := e4 -dst[39:32] := e3 -dst[47:40] := e2 -dst[55:48] := e1 -dst[63:56] := e0 - - MMX -
mmintrin.h
- Set -
- - - - - - - - Arm address monitoring hardware using the address specified in "p". A store to an address within the specified address range triggers the monitoring hardware. Specify optional extensions in "extensions", and optional hints in "hints". - - MONITOR -
pmmintrin.h
- General Support -
- - - - - Hint to the processor that it can enter an implementation-dependent-optimized state while waiting for an event or store operation to the address range specified by MONITOR. - - MONITOR -
pmmintrin.h
- General Support -
- - - - - - Load 16 bits from memory, perform a byte swap operation, and store the result in "dst". - -FOR j := 0 to 1 - i := j*8 - dst[i+7:i] := MEM[ptr+15-i:ptr+8-i] -ENDFOR - - - MOVBE -
immintrin.h
- Load -
- - - - Load 32 bits from memory, perform a byte swap operation, and store the result in "dst". - -FOR j := 0 to 3 - i := j*8 - dst[i+7:i] := MEM[ptr+31-i:ptr+24-i] -ENDFOR - - - MOVBE -
immintrin.h
- Load -
- - - - Load 64 bits from memory, perform a byte swap operation, and store the result in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := MEM[ptr+63-i:ptr+56-i] -ENDFOR - - - MOVBE -
immintrin.h
- Load -
- - - - - Perform a bit swap operation of the 16 bits in "data", and store the results to memory. - -FOR j := 0 to 1 - i := j*8 - MEM[ptr+i+7:ptr+i] := data[15-i:8-i] -ENDFOR - - - MOVBE -
immintrin.h
- Store -
- - - - - Perform a bit swap operation of the 32 bits in "data", and store the results to memory. - -addr := MEM[ptr] -FOR j := 0 to 3 - i := j*8 - MEM[ptr+i+7:ptr+i] := data[31-i:24-i] -ENDFOR - - - MOVBE -
immintrin.h
- Store -
- - - - - Perform a bit swap operation of the 64 bits in "data", and store the results to memory. - -addr := MEM[ptr] -FOR j := 0 to 7 - i := j*8 - MEM[ptr+i+7:ptr+i] := data[63-i:56-i] -ENDFOR - - - MOVBE -
immintrin.h
- Store -
- - - - - - - Move 64-byte (512-bit) value using direct store from source memory address "src" to destination memory address "dst". - -MEM[dst+511:dst] := MEM[src+511:src] - - - MOVDIR64B -
immintrin.h
- Store -
- - - - - - - Store 64-bit integer from "val" into memory using direct store. - -MEM[dst+63:dst] := val[63:0] - - - MOVDIRI -
immintrin.h
- Store -
- - - - - Store 32-bit integer from "val" into memory using direct store. - -MEM[dst+31:dst] := val[31:0] - - - MOVDIRI -
immintrin.h
- Store -
- - - - - - - Make a pointer with the value of "srcmem" and bounds set to ["srcmem", "srcmem" + "size" - 1], and store the result in "dst". - dst := srcmem -dst.LB := srcmem.LB -dst.UB := srcmem + size - 1 - - - MPX -
immintrin.h
- Miscellaneous - -
- - - - - - Narrow the bounds for pointer "q" to the intersection of the bounds of "r" and the bounds ["q", "q" + "size" - 1], and store the result in "dst". - dst := q -IF r.LB > (q + size - 1) OR r.UB < q - dst.LB := 1 - dst.UB := 0 -ELSE - dst.LB := MAX(r.LB, q) - dst.UB := MIN(r.UB, (q + size - 1)) -FI - - MPX -
immintrin.h
- Miscellaneous - -
- - - - - Make a pointer with the value of "q" and bounds set to the bounds of "r" (e.g. copy the bounds of "r" to pointer "q"), and store the result in "dst". - dst := q -dst.LB := r.LB -dst.UB := r.UB - - MPX -
immintrin.h
- Miscellaneous - -
- - - - Make a pointer with the value of "q" and open bounds, which allow the pointer to access the entire virtual address space, and store the result in "dst". - dst := q -dst.LB := 0 -dst.UB := 0 - - MPX -
immintrin.h
- Miscellaneous - -
- - - - - Stores the bounds of "ptr_val" pointer in memory at address "ptr_addr". - MEM[ptr_addr].LB := ptr_val.LB -MEM[ptr_addr].UB := ptr_val.UB - - - MPX -
immintrin.h
- Miscellaneous - -
- - - - Checks if "q" is within its lower bound, and throws a #BR if not. - IF q < q.LB - #BR -FI - - - MPX -
immintrin.h
- Miscellaneous - -
- - - - Checks if "q" is within its upper bound, and throws a #BR if not. - IF q > q.UB - #BR -FI - - - - MPX -
immintrin.h
- Miscellaneous - -
- - - - - Checks if ["q", "q" + "size" - 1] is within the lower and upper bounds of "q" and throws a #BR if not. - IF (q + size - 1) < q.LB OR (q + size - 1) > q.UB - #BR -FI - - - - MPX -
immintrin.h
- Miscellaneous - -
- - - - Return the lower bound of "q". - dst := q.LB - - MPX -
immintrin.h
- Miscellaneous - -
- - - - Return the upper bound of "q". - dst := q.UB - - MPX -
immintrin.h
- Miscellaneous - -
- - - - - Set "dst" to the index of the lowest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined. - -tmp := 0 -IF a == 0 - // dst is undefined -ELSE - DO WHILE ((tmp < 32) AND a[tmp] == 0) - tmp := tmp + 1 - OD -FI -dst := tmp - - -
immintrin.h
- Bit Manipulation -
- - - - Set "dst" to the index of the highest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined. - -tmp := 31 -IF a == 0 - // dst is undefined -ELSE - DO WHILE ((tmp > 0) AND a[tmp] == 0) - tmp := tmp - 1 - OD -FI -dst := tmp - - -
immintrin.h
- Bit Manipulation -
- - - - - Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. - -tmp := 0 -IF a == 0 - // MEM[index+31:index] is undefined - dst := 0 -ELSE - DO WHILE ((tmp < 32) AND a[tmp] == 0) - tmp := tmp + 1 - OD - MEM[index+31:index] := tmp - dst := (tmp == 31) ? 0 : 1 -FI - - -
immintrin.h
- Bit Manipulation -
- - - - - Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. - -tmp := 31 -IF a == 0 - // MEM[index+31:index] is undefined - dst := 0 -ELSE - DO WHILE ((tmp > 0) AND a[tmp] == 0) - tmp := tmp - 1 - OD - MEM[index+31:index] := tmp - dst := (tmp == 0) ? 0 : 1 -FI - - -
immintrin.h
- Bit Manipulation -
- - - - - Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. - -tmp := 0 -IF a == 0 - // MEM[index+31:index] is undefined - dst := 0 -ELSE - DO WHILE ((tmp < 64) AND a[tmp] == 0) - tmp := tmp + 1 - OD - MEM[index+31:index] := tmp - dst := (tmp == 63) ? 0 : 1 -FI - - -
immintrin.h
- Bit Manipulation -
- - - - - Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1. - -tmp := 63 -IF a == 0 - // MEM[index+31:index] is undefined - dst := 0 -ELSE - DO WHILE ((tmp > 0) AND a[tmp] == 0) - tmp := tmp - 1 - OD - MEM[index+31:index] := tmp - dst := (tmp == 0) ? 0 : 1 -FI - - -
immintrin.h
- Bit Manipulation -
- - - - - Return the bit at index "b" of 32-bit integer "a". - -addr := a + ZeroExtend64(b) -dst[0] := MEM[addr] - - -
immintrin.h
- Bit Manipulation -
- - - - - Return the bit at index "b" of 32-bit integer "a", and set that bit to its complement. - -addr := a + ZeroExtend64(b) -dst[0] := MEM[addr] -MEM[addr] := ~dst[0] - - -
immintrin.h
- Bit Manipulation -
- - - - - Return the bit at index "b" of 32-bit integer "a", and set that bit to zero. - -addr := a + ZeroExtend64(b) -dst[0] := MEM[addr] -MEM[addr] := 0 - - -
immintrin.h
- Bit Manipulation -
- - - - - Return the bit at index "b" of 32-bit integer "a", and set that bit to one. - -addr := a + ZeroExtend64(b) -dst[0] := MEM[addr] -MEM[addr] := 1 - - -
immintrin.h
- Bit Manipulation -
- - - - - Return the bit at index "b" of 64-bit integer "a". - -addr := a + b -dst[0] := MEM[addr] - - -
immintrin.h
- Bit Manipulation -
- - - - - Return the bit at index "b" of 64-bit integer "a", and set that bit to its complement. - -addr := a + b -dst[0] := MEM[addr] -MEM[addr] := ~dst[0] - - -
immintrin.h
- Bit Manipulation -
- - - - - Return the bit at index "b" of 64-bit integer "a", and set that bit to zero. - -addr := a + b -dst[0] := MEM[addr] -MEM[addr] := 0 - - -
immintrin.h
- Bit Manipulation -
- - - - - Return the bit at index "b" of 64-bit integer "a", and set that bit to one. - -addr := a + b -dst[0] := MEM[addr] -MEM[addr] := 1 - - -
immintrin.h
- Bit Manipulation -
- - - - Reverse the byte order of 32-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values. - -dst[7:0] := a[31:24] -dst[15:8] := a[23:16] -dst[23:16] := a[15:8] -dst[31:24] := a[7:0] - - -
immintrin.h
- Bit Manipulation -
- - - - Reverse the byte order of 64-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values. - -dst[7:0] := a[63:56] -dst[15:8] := a[55:48] -dst[23:16] := a[47:40] -dst[31:24] := a[39:32] -dst[39:32] := a[31:24] -dst[47:40] := a[23:16] -dst[55:48] := a[15:8] -dst[63:56] := a[7:0] - - -
immintrin.h
- Bit Manipulation -
- - - - Cast from type float to type unsigned __int32 without conversion. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. -
immintrin.h
- Cast -
- - - - Cast from type double to type unsigned __int64 without conversion. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. -
immintrin.h
- Cast -
- - - - Cast from type unsigned __int32 to type float without conversion. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. -
immintrin.h
- Cast -
- - - - Cast from type unsigned __int64 to type double without conversion. - This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. -
immintrin.h
- Cast -
- - - - - Shift the bits of unsigned long integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". - // size := 32 or 64 -dst := a -count := shift AND (size - 1) -DO WHILE (count > 0) - tmp[0] := dst[size - 1] - dst := (dst << 1) OR tmp[0] - count := count - 1 -OD - - - -
immintrin.h
- Shift -
- - - - - Shift the bits of unsigned long integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". - // size := 32 or 64 -dst := a -count := shift AND (size - 1) -DO WHILE (count > 0) - tmp[size - 1] := dst[0] - dst := (dst >> 1) OR tmp[size - 1] - count := count - 1 -OD - - -
immintrin.h
- Shift -
- - - - - Shift the bits of unsigned 32-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". - -dst := a -count := shift AND 31 -DO WHILE (count > 0) - tmp[0] := dst[31] - dst := (dst << 1) OR tmp[0] - count := count - 1 -OD - - -
immintrin.h
- Shift -
- - - - - Shift the bits of unsigned 32-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". - -dst := a -count := shift AND 31 -DO WHILE (count > 0) - tmp[31] := dst[0] - dst := (dst >> 1) OR tmp - count := count - 1 -OD - - -
immintrin.h
- Shift -
- - - - - Shift the bits of unsigned 16-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". - -dst := a -count := shift AND 15 -DO WHILE (count > 0) - tmp[0] := dst[15] - dst := (dst << 1) OR tmp[0] - count := count - 1 -OD - - -
immintrin.h
- Shift -
- - - - - Shift the bits of unsigned 16-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". - -dst := a -count := shift AND 15 -DO WHILE (count > 0) - tmp[15] := dst[0] - dst := (dst >> 1) OR tmp - count := count - 1 -OD - - -
immintrin.h
- Shift -
- - - - - Shift the bits of unsigned 64-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst". - -dst := a -count := shift AND 63 -DO WHILE (count > 0) - tmp[0] := dst[63] - dst := (dst << 1) OR tmp[0] - count := count - 1 -OD - - -
immintrin.h
- Shift -
- - - - - Shift the bits of unsigned 64-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst". - -dst := a -count := shift AND 63 -DO WHILE (count > 0) - tmp[63] := dst[0] - dst := (dst >> 1) OR tmp[63] - count := count - 1 -OD - - -
immintrin.h
- Shift -
- - - - Treat the processor-specific feature(s) specified in "a" as available. Multiple features may be OR'd together. See the valid feature flags below: - -_FEATURE_GENERIC_IA32 -_FEATURE_FPU -_FEATURE_CMOV -_FEATURE_MMX -_FEATURE_FXSAVE -_FEATURE_SSE -_FEATURE_SSE2 -_FEATURE_SSE3 -_FEATURE_SSSE3 -_FEATURE_SSE4_1 -_FEATURE_SSE4_2 -_FEATURE_MOVBE -_FEATURE_POPCNT -_FEATURE_PCLMULQDQ -_FEATURE_AES -_FEATURE_F16C -_FEATURE_AVX -_FEATURE_RDRND -_FEATURE_FMA -_FEATURE_BMI -_FEATURE_LZCNT -_FEATURE_HLE -_FEATURE_RTM -_FEATURE_AVX2 -_FEATURE_KNCNI -_FEATURE_AVX512F -_FEATURE_ADX -_FEATURE_RDSEED -_FEATURE_AVX512ER -_FEATURE_AVX512PF -_FEATURE_AVX512CD -_FEATURE_SHA -_FEATURE_MPX -_FEATURE_AVX512BW -_FEATURE_AVX512VL -_FEATURE_AVX512VBMI -_FEATURE_AVX512_4FMAPS -_FEATURE_AVX512_4VNNIW -_FEATURE_AVX512_VPOPCNTDQ -_FEATURE_AVX512_BITALG -_FEATURE_AVX512_VBMI2 -_FEATURE_GFNI -_FEATURE_VAES -_FEATURE_VPCLMULQDQ -_FEATURE_AVX512_VNNI -_FEATURE_CLWB -_FEATURE_RDPID -_FEATURE_IBT -_FEATURE_SHSTK -_FEATURE_SGX -_FEATURE_WBNOINVD -_FEATURE_PCONFIG -_FEATURE_AXV512_4VNNIB -_FEATURE_AXV512_4FMAPH -_FEATURE_AXV512_BITALG2 -_FEATURE_AXV512_VP2INTERSECT - -
immintrin.h
- General Support -
- - - - Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This function is limited to bitmask values in the first 'page' of the libirc cpu-id information. This intrinsic does not check the processor vendor. See the valid feature flags below: - -_FEATURE_GENERIC_IA32 -_FEATURE_FPU -_FEATURE_CMOV -_FEATURE_MMX -_FEATURE_FXSAVE -_FEATURE_SSE -_FEATURE_SSE2 -_FEATURE_SSE3 -_FEATURE_SSSE3 -_FEATURE_SSE4_1 -_FEATURE_SSE4_2 -_FEATURE_MOVBE -_FEATURE_POPCNT -_FEATURE_PCLMULQDQ -_FEATURE_AES -_FEATURE_F16C -_FEATURE_AVX -_FEATURE_RDRND -_FEATURE_FMA -_FEATURE_BMI -_FEATURE_LZCNT -_FEATURE_HLE -_FEATURE_RTM -_FEATURE_AVX2 -_FEATURE_KNCNI -_FEATURE_AVX512F -_FEATURE_ADX -_FEATURE_RDSEED -_FEATURE_AVX512ER -_FEATURE_AVX512PF -_FEATURE_AVX512CD -_FEATURE_SHA -_FEATURE_MPX -_FEATURE_AVX512BW -_FEATURE_AVX512VL -_FEATURE_AVX512VBMI -_FEATURE_AVX512_4FMAPS -_FEATURE_AVX512_4VNNIW -_FEATURE_AVX512_VPOPCNTDQ -_FEATURE_AVX512_BITALG -_FEATURE_AVX512_VBMI2 -_FEATURE_GFNI -_FEATURE_VAES -_FEATURE_VPCLMULQDQ -_FEATURE_AVX512_VNNI -_FEATURE_CLWB -_FEATURE_RDPID -_FEATURE_IBT -_FEATURE_SHSTK -_FEATURE_SGX -_FEATURE_WBNOINVD -_FEATURE_PCONFIG -_FEATURE_AXV512_4VNNIB -_FEATURE_AXV512_4FMAPH -_FEATURE_AXV512_BITALG2 -_FEATURE_AXV512_VP2INTERSECT -_FEATURE_AXV512_FP16 - -
immintrin.h
- General Support -
- - - - - Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This works identically to the previous variant, except it also accepts a 'page' index that permits checking features on the 2nd page of the libirc information. When provided with a '0' in the 'page' parameter, this works identically to _may_i_use_cpu_feature. This intrinsic does not check the processor vendor. See the valid feature flags on the 2nd page below: (provided with a '1' in the 'page' parameter) - -_FEATURE_CLDEMOTE -_FEATURE_MOVDIRI -_FEATURE_MOVDIR64B -_FEATURE_WAITPKG -_FEATURE_AVX512_Bf16 -_FEATURE_ENQCMD -_FEATURE_AVX_VNNI -_FEATURE_AMX_TILE -_FEATURE_AMX_INT8 -_FEATURE_AMX_BF16 -_FEATURE_KL -_FEATURE_WIDE_KL -_FEATURE_HRESET -_FEATURE_UINTR -_FEATURE_PREFETCHI -_FEATURE_AVXVNNIINT8 -_FEATURE_CMPCCXADD -_FEATURE_AVXIFMA -_FEATURE_AVXNECONVERT -_FEATURE_RAOINT -_FEATURE_AMX_FP16 -_FEATURE_AMX_COMPLEX -_FEATURE_SHA512 -_FEATURE_SM3 -_FEATURE_SM4 -_FEATURE_AVXVNNIINT16 -_FEATURE_USERMSR -_FEATURE_AVX10_1_256 -_FEATURE_AVX10_1_512 -_FEATURE_APXF -_FEATURE_MSRLIST -_FEATURE_WRMSRNS -_FEATURE_PBNDKB - -
immintrin.h
- General Support -
- - - - Dynamically query the processor to determine if the processor-specific feature(s) specified a series of compile-time string literals in "feature, ..." are available, and return true or false (1 or 0) if the set of features is available. These feature names are converted to a bitmask and uses the same infrastructure as _may_i_use_cpu_feature_ext to validate it. The behavior is the same as the previous variants. This intrinsic does not check the processor vendor. Supported string literals are one-to-one corresponding in the "Operation" sections of _may_i_use_cpu_feature and _may_i_use_cpu_feature_ext. Example string literals are "avx2", "bmi", "avx512fp16", "amx-int8"... - - -
immintrin.h
- General Support -
- - - - Read the Performance Monitor Counter (PMC) specified by "a", and store up to 64-bits in "dst". The width of performance counters is implementation specific. - dst[63:0] := ReadPMC(a) - - -
immintrin.h
- General Support -
- - - - - - - Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag). - -tmp[32:0] := a[31:0] + b[31:0] + (c_in > 0 ? 1 : 0) -MEM[out+31:out] := tmp[31:0] -dst[0] := tmp[32] -dst[7:1] := 0 - - -
immintrin.h
- Arithmetic -
- - - - - - - Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). - -tmp[64:0] := a[63:0] + b[63:0] + (c_in > 0 ? 1 : 0) -MEM[out+63:out] := tmp[63:0] -dst[0] := tmp[64] -dst[7:1] := 0 - - -
immintrin.h
- Arithmetic -
- - - - - - - Add unsigned 8-bit borrow "c_in" (carry flag) to unsigned 32-bit integer "b", and subtract the result from unsigned 32-bit integer "a". Store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag). - -tmp[32:0] := a[31:0] - (b[31:0] + (c_in > 0 ? 1 : 0)) -MEM[out+31:out] := tmp[31:0] -dst[0] := tmp[32] -dst[7:1] := 0 - - -
immintrin.h
- Arithmetic -
- - - - - - - Add unsigned 8-bit borrow "c_in" (carry flag) to unsigned 64-bit integer "b", and subtract the result from unsigned 64-bit integer "a". Store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag). - -tmp[64:0] := a[63:0] - (b[63:0] + (c_in > 0 ? 1 : 0)) -MEM[out+63:out] := tmp[63:0] -dst[0] := tmp[64] -dst[7:1] := 0 - - -
immintrin.h
- Arithmetic -
- - - - Insert the 32-bit data from "a" into a Processor Trace stream via a PTW packet. The PTW packet will be inserted if tracing is currently enabled and ptwrite is currently enabled. The current IP will also be inserted via a FUP packet if FUPonPTW is enabled. - -
immintrin.h
- Miscellaneous -
- - - - Insert the 64-bit data from "a" into a Processor Trace stream via a PTW packet. The PTW packet will be inserted if tracing is currently enabled and ptwrite is currently enabled. The current IP will also be inserted via a FUP packet if FUPonPTW is enabled. - -
immintrin.h
- Miscellaneous -
- - - - - Invoke the Intel SGX enclave user (non-privilege) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. - -
immintrin.h
- Miscellaneous -
- - - - - Invoke the Intel SGX enclave system (privileged) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. - -
immintrin.h
- Miscellaneous -
- - - - - Invoke the Intel SGX enclave virtualized (VMM) leaf function specified by "a", and return the error code. The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. - -
immintrin.h
- Miscellaneous -
- - - - Write back and flush internal caches. - Initiate writing-back and flushing of external - caches. - -
immintrin.h
- Miscellaneous -
- - - - Convert the half-precision (16-bit) floating-point value "a" to a single-precision (32-bit) floating-point value, and store the result in "dst". - -dst[31:0] := Convert_FP16_To_FP32(a[15:0]) - -
emmintrin.h
- Convert -
- - - - - Convert the single-precision (32-bit) floating-point value "a" to a half-precision (16-bit) floating-point value, and store the result in "dst". - [round_note] - -dst[15:0] := Convert_FP32_To_FP16(a[31:0]) - -
emmintrin.h
- Convert -
- - - - - - - Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst". - -IF (imm8[0] == 0) - TEMP1 := a[63:0] -ELSE - TEMP1 := a[127:64] -FI -IF (imm8[4] == 0) - TEMP2 := b[63:0] -ELSE - TEMP2 := b[127:64] -FI -FOR i := 0 to 63 - TEMP[i] := (TEMP1[0] and TEMP2[i]) - FOR j := 1 to i - TEMP[i] := TEMP[i] XOR (TEMP1[j] AND TEMP2[i-j]) - ENDFOR - dst[i] := TEMP[i] -ENDFOR -FOR i := 64 to 127 - TEMP[i] := 0 - FOR j := (i - 63) to 63 - TEMP[i] := TEMP[i] XOR (TEMP1[j] AND TEMP2[i-j]) - ENDFOR - dst[i] := TEMP[i] -ENDFOR -dst[127] := 0 - - - PCLMULQDQ -
wmmintrin.h
- Application-Targeted -
- - - - - - - Invoke the PCONFIG leaf function specified by "a". The "__data" array contains 3 32- or 64-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx. May return the value in eax, depending on the semantics of the specified leaf function. - - PCONFIG -
immintrin.h
- Miscellaneous -
- - - - - - Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst". - -dst := 0 -FOR i := 0 to 31 - IF a[i] - dst := dst + 1 - FI -ENDFOR - - - POPCNT -
immintrin.h
- Bit Manipulation -
- - - - Count the number of bits set to 1 in unsigned 64-bit integer "a", and return that count in "dst". - -dst := 0 -FOR i := 0 to 63 - IF a[i] - dst := dst + 1 - FI -ENDFOR - - - POPCNT -
immintrin.h
- Bit Manipulation -
- - - - Count the number of bits set to 1 in 32-bit integer "a", and return that count in "dst". - -dst := 0 -FOR i := 0 to 31 - IF a[i] - dst := dst + 1 - FI -ENDFOR - - - POPCNT -
immintrin.h
- Bit Manipulation -
- - - - Count the number of bits set to 1 in 64-bit integer "a", and return that count in "dst". - -dst := 0 -FOR i := 0 to 63 - IF a[i] - dst := dst + 1 - FI -ENDFOR - - - POPCNT -
immintrin.h
- Bit Manipulation -
- - - - - - Loads an instruction sequence containing the specified memory address into all level cache. - - PREFETCHI -
x86gprintrin.h
- General Support -
- - - - Loads an instruction sequence containing the specified memory address into all but the first-level cache. - - PREFETCHI -
x86gprintrin.h
- General Support -
- - - - - Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i", which can be one of:<ul> - <li>_MM_HINT_ET0 // 7, move data using the ET0 hint. The PREFETCHW instruction will be generated.</li> - <li>_MM_HINT_T0 // 3, move data using the T0 hint. The PREFETCHT0 instruction will be generated.</li> - <li>_MM_HINT_T1 // 2, move data using the T1 hint. The PREFETCHT1 instruction will be generated.</li> - <li>_MM_HINT_T2 // 1, move data using the T2 hint. The PREFETCHT2 instruction will be generated.</li> - <li>_MM_HINT_NTA // 0, move data using the non-temporal access (NTA) hint. The PREFETCHNTA instruction will be generated.</li> - - - - - - - PRFCHW -
immintrin.h
- General Support -
- - - - - Atomically add a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. - - -MEM[__A+31:__A] := MEM[__A+31:__A] + __B[31:0] - - - - RAO_INT -
x86gprintrin.h
- Arithmetic -
- - - Atomically add a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. - - -MEM[__A+63:__A] := MEM[__A+63:__A] + __B[63:0] - - - - RAO_INT -
x86gprintrin.h
- Arithmetic -
- - - Atomically and a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. - - -MEM[__A+31:__A] := MEM[__A+31:__A] AND __B[31:0] - - - - RAO_INT -
x86gprintrin.h
- Arithmetic -
- - - Atomically and a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. - - -MEM[__A+63:__A] := MEM[__A+63:__A] AND __B[63:0] - - - - RAO_INT -
x86gprintrin.h
- Arithmetic -
- - - Atomically or a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. - - -MEM[__A+31:__A] := MEM[__A+31:__A] OR __B[31:0] - - - - RAO_INT -
x86gprintrin.h
- Arithmetic -
- - - Atomically or a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. - - -MEM[__A+63:__A] := MEM[__A+63:__A] OR __B[63:0] - - - - RAO_INT -
x86gprintrin.h
- Arithmetic -
- - - Atomically xor a 32-bit value at memory operand "__A" and a 32-bit "__B", and store the result to the same memory location. - - -MEM[__A+31:__A] := MEM[__A+31:__A] XOR __B[31:0] - - - - RAO_INT -
x86gprintrin.h
- Arithmetic -
- - - Atomically xor a 64-bit value at memory operand "__A" and a 64-bit "__B", and store the result to the same memory location. - - -MEM[__A+63:__A] := MEM[__A+63:__A] XOR __B[63:0] - - - - RAO_INT -
x86gprintrin.h
- Arithmetic -
- - - - Copy the IA32_TSC_AUX MSR (signature value) into "dst". - dst[31:0] := IA32_TSC_AUX[31:0] - - - RDPID -
immintrin.h
- General Support -
- - - - - - Read a hardware generated 16-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. - IF HW_RND_GEN.ready == 1 - val[15:0] := HW_RND_GEN.data - dst := 1 -ELSE - val[15:0] := 0 - dst := 0 -FI - - - RDRAND -
immintrin.h
- Random -
- - - - Read a hardware generated 32-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. - IF HW_RND_GEN.ready == 1 - val[31:0] := HW_RND_GEN.data - dst := 1 -ELSE - val[31:0] := 0 - dst := 0 -FI - - - RDRAND -
immintrin.h
- Random -
- - - - Read a hardware generated 64-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise. - IF HW_RND_GEN.ready == 1 - val[63:0] := HW_RND_GEN.data - dst := 1 -ELSE - val[63:0] := 0 - dst := 0 -FI - - - RDRAND -
immintrin.h
- Random -
- - - - - - Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. - IF HW_NRND_GEN.ready == 1 - val[15:0] := HW_NRND_GEN.data - dst := 1 -ELSE - val[15:0] := 0 - dst := 0 -FI - - - RDSEED -
immintrin.h
- Random -
- - - - Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. - IF HW_NRND_GEN.ready == 1 - val[31:0] := HW_NRND_GEN.data - dst := 1 -ELSE - val[31:0] := 0 - dst := 0 -FI - - - RDSEED -
immintrin.h
- Random -
- - - - Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise. - IF HW_NRND_GEN.ready == 1 - val[63:0] := HW_NRND_GEN.data - dst := 1 -ELSE - val[63:0] := 0 - dst := 0 -FI - - - RDSEED -
immintrin.h
- Random -
- - - - - - Copy the current 64-bit value of the processor's time-stamp counter into "dst", and store the IA32_TSC_AUX MSR (signature value) into memory at "mem_addr". - dst[63:0] := TimeStampCounter -MEM[mem_addr+31:mem_addr] := IA32_TSC_AUX[31:0] - - - RDTSCP -
immintrin.h
- General Support -
- - - - - - Force an RTM abort. The EAX register is updated to reflect an XABORT instruction caused the abort, and the "imm8" parameter will be provided in bits [31:24] of EAX. - Following an RTM abort, the logical processor resumes execution at the fallback address computed through the outermost XBEGIN instruction. - IF RTM_ACTIVE == 0 - // nop -ELSE - // restore architectural register state - // discard memory updates performed in transaction - // update EAX with status and imm8 value - eax[31:24] := imm8[7:0] - RTM_NEST_COUNT := 0 - RTM_ACTIVE := 0 - IF _64_BIT_MODE - RIP := fallbackRIP - ELSE - EIP := fallbackEIP - FI -FI - - - RTM -
immintrin.h
- General Support -
- - - - Specify the start of an RTM code region. - If the logical processor was not already in transactional execution, then this call causes the logical processor to transition into transactional execution. - On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution, restores architectural state, and starts execution beginning at the fallback address computed from the outermost XBEGIN instruction. Return status of ~0 (0xFFFF) if continuing inside transaction; all other codes are aborts. - IF RTM_NEST_COUNT < MAX_RTM_NEST_COUNT - RTM_NEST_COUNT := RTM_NEST_COUNT + 1 - IF RTM_NEST_COUNT == 1 - IF _64_BIT_MODE - fallbackRIP := RIP - ELSE IF _32_BIT_MODE - fallbackEIP := EIP - FI - - RTM_ACTIVE := 1 - // enter RTM execution, record register state, start tracking memory state - FI -ELSE - // RTM abort (see _xabort) -FI - - - RTM -
immintrin.h
- General Support -
- - - - Specify the end of an RTM code region. - If this corresponds to the outermost scope, the logical processor will attempt to commit the logical processor state atomically. - If the commit fails, the logical processor will perform an RTM abort. - IF RTM_ACTIVE == 1 - RTM_NEST_COUNT := RTM_NEST_COUNT - 1 - IF RTM_NEST_COUNT == 0 - // try to commit transaction - IF FAIL_TO_COMMIT_TRANSACTION - // RTM abort (see _xabort) - ELSE - RTM_ACTIVE := 0 - FI - FI -FI - - - RTM -
immintrin.h
- General Support -
- - - - Query the transactional execution status, return 1 if inside a transactionally executing RTM or HLE region, and return 0 otherwise. - IF (RTM_ACTIVE == 1 OR HLE_ACTIVE == 1) - dst := 1 -ELSE - dst := 0 -FI - - - RTM -
immintrin.h
- General Support -
- - - - - Serialize instruction execution, ensuring all modifications to flags, registers, and memory by previous instructions are completed before the next instruction is fetched. - - SERIALIZE -
immintrin.h
- General Support -
- - - - - - - Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst". - -W0 := a[127:96] -W1 := a[95:64] -W2 := a[63:32] -W3 := a[31:0] -W4 := b[127:96] -W5 := b[95:64] -dst[127:96] := W2 XOR W0 -dst[95:64] := W3 XOR W1 -dst[63:32] := W4 XOR W2 -dst[31:0] := W5 XOR W3 - - - SHA -
immintrin.h
- Cryptography -
- - - - - Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in "a" and the previous message values in "b", and store the result in "dst". - -W13 := b[95:64] -W14 := b[63:32] -W15 := b[31:0] -W16 := (a[127:96] XOR W13) <<< 1 -W17 := (a[95:64] XOR W14) <<< 1 -W18 := (a[63:32] XOR W15) <<< 1 -W19 := (a[31:0] XOR W16) <<< 1 -dst[127:96] := W16 -dst[95:64] := W17 -dst[63:32] := W18 -dst[31:0] := W19 - - - SHA -
immintrin.h
- Cryptography -
- - - - - Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable "a", add that value to the scheduled values (unsigned 32-bit integers) in "b", and store the result in "dst". - -tmp := (a[127:96] <<< 30) -dst[127:96] := b[127:96] + tmp -dst[95:64] := b[95:64] -dst[63:32] := b[63:32] -dst[31:0] := b[31:0] - - - SHA -
immintrin.h
- Cryptography -
- - - - - - Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from "a" and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from "b", and store the updated SHA1 state (A,B,C,D) in "dst". "func" contains the logic functions and round constants. - IF (func[1:0] == 0) - f := f0() - K := K0 -ELSE IF (func[1:0] == 1) - f := f1() - K := K1 -ELSE IF (func[1:0] == 2) - f := f2() - K := K2 -ELSE IF (func[1:0] == 3) - f := f3() - K := K3 -FI -A := a[127:96] -B := a[95:64] -C := a[63:32] -D := a[31:0] -W[0] := b[127:96] -W[1] := b[95:64] -W[2] := b[63:32] -W[3] := b[31:0] -A[1] := f(B, C, D) + (A <<< 5) + W[0] + K -B[1] := A -C[1] := B <<< 30 -D[1] := C -E[1] := D -FOR i := 1 to 3 - A[i+1] := f(B[i], C[i], D[i]) + (A[i] <<< 5) + W[i] + E[i] + K - B[i+1] := A[i] - C[i+1] := B[i] <<< 30 - D[i+1] := C[i] - E[i+1] := D[i] -ENDFOR -dst[127:96] := A[4] -dst[95:64] := B[4] -dst[63:32] := C[4] -dst[31:0] := D[4] - - - SHA -
immintrin.h
- Cryptography -
- - - - - Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst". - W4 := b[31:0] -W3 := a[127:96] -W2 := a[95:64] -W1 := a[63:32] -W0 := a[31:0] -dst[127:96] := W3 + sigma0(W4) -dst[95:64] := W2 + sigma0(W3) -dst[63:32] := W1 + sigma0(W2) -dst[31:0] := W0 + sigma0(W1) - - - SHA -
immintrin.h
- Cryptography -
- - - - - Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst"." - W14 := b[95:64] -W15 := b[127:96] -W16 := a[31:0] + sigma1(W14) -W17 := a[63:32] + sigma1(W15) -W18 := a[95:64] + sigma1(W16) -W19 := a[127:96] + sigma1(W17) -dst[127:96] := W19 -dst[95:64] := W18 -dst[63:32] := W17 -dst[31:0] := W16 - - - SHA -
immintrin.h
- Cryptography -
- - - - - - Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from "a", an initial SHA256 state (A,B,E,F) from "b", and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from "k", and store the updated SHA256 state (A,B,E,F) in "dst". - A[0] := b[127:96] -B[0] := b[95:64] -C[0] := a[127:96] -D[0] := a[95:64] -E[0] := b[63:32] -F[0] := b[31:0] -G[0] := a[63:32] -H[0] := a[31:0] -W_K[0] := k[31:0] -W_K[1] := k[63:32] -FOR i := 0 to 1 - A[i+1] := Ch(E[i], F[i], G[i]) + sum1(E[i]) + W_K[i] + H[i] + Maj(A[i], B[i], C[i]) + sum0(A[i]) - B[i+1] := A[i] - C[i+1] := B[i] - D[i+1] := C[i] - E[i+1] := Ch(E[i], F[i], G[i]) + sum1(E[i]) + W_K[i] + H[i] + D[i] - F[i+1] := E[i] - G[i+1] := F[i] - H[i+1] := G[i] -ENDFOR -dst[127:96] := A[2] -dst[95:64] := B[2] -dst[63:32] := E[2] -dst[31:0] := F[2] - - - SHA -
immintrin.h
- Cryptography -
- - - - - This intrinisc is one of the two SHA512 message scheduling instructions. The intrinsic performs an intermediate calculation for the next four SHA512 message qwords. The calculated results are stored in "dst". - - -DEFINE ROR64(qword, n) { - count := n % 64 - dest := (qword >> count) | (qword << (64 - count)) - RETURN dest -} -DEFINE SHR64(qword, n) { - RETURN qword >> n -} -DEFINE s0(qword) { - RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7) -} -W.qword[4] := __B.qword[0] -W.qword[3] := __A.qword[3] -W.qword[2] := __A.qword[2] -W.qword[1] := __A.qword[1] -W.qword[0] := __A.qword[0] -dst.qword[3] := W.qword[3] + s0(W.qword[4]) -dst.qword[2] := W.qword[2] + s0(W.qword[3]) -dst.qword[1] := W.qword[1] + s0(W.qword[2]) -dst.qword[0] := W.qword[0] + s0(W.qword[1]) - - - - SHA512 - AVX -
immintrin.h
- Cryptography -
- - - This intrinisc is one of the two SHA512 message scheduling instructions. The intrinsic performs the final calculation for the next four SHA512 message qwords. The calculated results are stored in "dst". - - -DEFINE ROR64(qword, n) { - count := n % 64 - dest := (qword >> count) | (qword << (64 - count)) - RETURN dest -} -DEFINE SHR64(qword, n) { - RETURN qword >> n -} -DEFINE s1(qword) { - RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6) -} -W.qword[14] := __B.qword[2] -W.qword[15] := __B.qword[3] -W.qword[16] := __A.qword[0] + s1(W.qword[14]) -W.qword[17] := __A.qword[1] + s1(W.qword[15]) -W.qword[18] := __A.qword[2] + s1(W.qword[16]) -W.qword[19] := __A.qword[3] + s1(W.qword[17]) -dst.qword[3] := W.qword[19] -dst.qword[2] := W.qword[18] -dst.qword[1] := W.qword[17] -dst.qword[0] := W.qword[16] - - - - SHA512 - AVX -
immintrin.h
- Cryptography -
- - - This intrinisc performs two rounds of SHA512 operation using initial SHA512 state (C,D,G,H) from "__A", an initial SHA512 state (A,B,E,F) from "__B", and a pre-computed sum of the next two round message qwords and the corresponding round constants from "__C" (only the two lower qwords of the third operand). The updated SHA512 state (A,B,E,F) is written to "dst", and "dst" can be used as the updated state (C,D,G,H) in later rounds. - - -DEFINE ROR64(qword, n) { - count := n % 64 - dest := (qword >> count) | (qword << (64 - count)) - RETURN dest -} -DEFINE SHR64(qword, n) { - RETURN qword >> n -} -DEFINE cap_sigma0(qword) { - RETURN ROR64(qword, 28) ^ ROR64(qword, 34) ^ ROR64(qword, 39) -} -DEFINE cap_sigma1(qword) { - RETURN ROR64(qword, 14) ^ ROR64(qword, 18) ^ ROR64(qword, 41) -} -DEFINE MAJ(a,b,c) { - RETURN (a & b) ^ (a & c) ^ (b & c) -} -DEFINE CH(a,b,c) { - RETURN (a & b) ^ (c & ~a) -} -A.qword[0] := __B.qword[3] -B.qword[0] := __B.qword[2] -C.qword[0] := __A.qword[3] -D.qword[0] := __A.qword[2] -E.qword[0] := __B.qword[1] -F.qword[0] := __B.qword[0] -G.qword[0] := __A.qword[1] -H.qword[0] := __A.qword[0] -WK.qword[0]:= __C.qword[0] -WK.qword[1]:= __C.qword[1] -FOR i := 0 to 1 - A.qword[i+1] := CH(E.qword[i], F.qword[i], G.qword[i]) + cap_sigma1(E.qword[i]) + WK.qword[i] + H.qword[i] + MAJ(A.qword[i], B.qword[i], C.qword[i]) + cap_sigma0(A.qword[i]) - B.qword[i+1] := A.qword[i] - C.qword[i+1] := B.qword[i] - D.qword[i+1] := C.qword[i] - E.qword[i+1] := CH(E.qword[i], F.qword[i], G.qword[i]) + cap_sigma1(E.qword[i]) + WK.qword[i] + H.qword[i] + D.qword[i] - F.qword[i+1] := E.qword[i] - G.qword[i+1] := F.qword[i] - H.qword[i+1] := G.qword[i] -ENDFOR -dst.qword[3] := A.qword[2] -dst.qword[2] := B.qword[2] -dst.qword[1] := E.qword[2] -dst.qword[0] := F.qword[2] - - - - - SHA512 - AVX -
immintrin.h
- Cryptography -
- - - The VSM3MSG1 intrinsic is one of the two SM3 message scheduling intrinsics. The intrinsic performs an initial calculation for the next four SM3 message words. The calculated results are stored in "dst". - - -DEFINE ROL32(dword, n) { - count := n % 32 - dest := (dword << count) | (dword >> (32 - count)) - RETURN dest -} -DEFINE P1(x) { - RETURN x ^ ROL32(x, 15) ^ ROL32(x, 23) -} -W.dword[0] := __C.dword[0] -W.dword[1] := __C.dword[1] -W.dword[2] := __C.dword[2] -W.dword[3] := __C.dword[3] -W.dword[7] := __A.dword[0] -W.dword[8] := __A.dword[1] -W.dword[9] := __A.dword[2] -W.dword[10] := __A.dword[3] -W.dword[13] := __B.dword[0] -W.dword[14] := __B.dword[1] -W.dword[15] := __B.dword[2] -TMP0 := W.dword[7] ^ W.dword[0] ^ ROL32(W.dword[13], 15) -TMP1 := W.dword[8] ^ W.dword[1] ^ ROL32(W.dword[14], 15) -TMP2 := W.dword[9] ^ W.dword[2] ^ ROL32(W.dword[15], 15) -TMP3 := W.dword[10] ^ W.dword[3] -dst.dword[0] := P1(TMP0) -dst.dword[1] := P1(TMP1) -dst.dword[2] := P1(TMP2) -dst.dword[3] := P1(TMP3) - - - - - SM3 - AVX -
immintrin.h
- Cryptography -
- - - The VSM3MSG2 intrinsic is one of the two SM3 message scheduling intrinsics. The intrinsic performs the final calculation for the next four SM3 message words. The calculated results are stored in "dst". - - -DEFINE ROL32(dword, n) { - count := n % 32 - dest := (dword << count) | (dword >> (32-count)) - RETURN dest -} -WTMP.dword[0] := __A.dword[0] -WTMP.dword[1] := __A.dword[1] -WTMP.dword[2] := __A.dword[2] -WTMP.dword[3] := __A.dword[3] -W.dword[3] := __B.dword[0] -W.dword[4] := __B.dword[1] -W.dword[5] := __B.dword[2] -W.dword[6] := __B.dword[3] -W.dword[10] := __C.dword[0] -W.dword[11] := __C.dword[1] -W.dword[12] := __C.dword[2] -W.dword[13] := __C.dword[3] -W.dword[16] := ROL32(W.dword[3], 7) ^ W.dword[10] ^ WTMP.dword[0] -W.dword[17] := ROL32(W.dword[4], 7) ^ W.dword[11] ^ WTMP.dword[1] -W.dword[18] := ROL32(W.dword[5], 7) ^ W.dword[12] ^ WTMP.dword[2] -W.dword[19] := ROL32(W.dword[6], 7) ^ W.dword[13] ^ WTMP.dword[3] -W.dword[19] := W.dword[19] ^ ROL32(W.dword[16], 6) ^ ROL32(W.dword[16], 15) ^ ROL32(W.dword[16], 30) -dst.dword[0] := W.dword[16] -dst.dword[1] := W.dword[17] -dst.dword[2] := W.dword[18] -dst.dword[3] := W.dword[19] - - - - - SM3 - AVX -
immintrin.h
- Cryptography -
- - - The intrinsic performs two rounds of SM3 operation using initial SM3 state (C, D, G, H) from "__A", an initial SM3 states (A, B, E, F) from "__B" and a pre-computed words from the "__C". "__A" with initial SM3 state of (C, D, G, H) assumes input of non-rotated left variables from previous state. The updated SM3 state (A, B, E, F) is written to "__A". The "imm8" should contain the even round number for the first of the two rounds computed by this instruction. The computation masks the "imm8" value by ANDing it with 0x3E so that only even round numbers from 0 through 62 are used for this operation. The calculated results are stored in "dst". - - -DEFINE ROL32(dword, n) { - count := n % 32 - dest := (dword << count) | (dword >> (32-count)) - RETURN dest -} -DEFINE P0(x) { - RETURN x ^ ROL32(x, 9) ^ ROL32(x, 17) -} -DEFINE FF(x, y, z, round) { - IF round < 16 - RETURN (x ^ y ^ z) - ELSE - RETURN (x & y) | (x & z) | (y & z) - FI -} -DEFINE GG(x, y, z, round){ - IF round < 16 - RETURN (x ^ y ^ z) - ELSE - RETURN (x & y) | (~x & z) - FI -} -A.dword[0] := __B.dword[3] -B.dword[0] := __B.dword[2] -C.dword[0] := __A.dword[3] -D.dword[0] := __A.dword[2] -E.dword[0] := __B.dword[1] -F.dword[0] := __B.dword[0] -G.dword[0] := __A.dword[1] -H.dword[0] := __A.dword[0] -W.dword[0] := __C.dword[0] -W.dword[1] := __C.dword[1] -W.dword[4] := __C.dword[2] -W.dword[5] := __C.dword[3] -C.dword[0] := ROL32(C.dword[0], 9) -D.dword[0] := ROL32(D.dword[0], 9) -G.dword[0] := ROL32(G.dword[0], 19) -H.dword[0] := ROL32(H.dword[0], 19) -ROUND := imm8 & 0x3E -IF ROUND < 16 - CONST.dword[0] := 0x79CC4519 -ELSE - CONST.dword[0] := 0x7A879D8A -FI -CONST.dword[0] := ROL32(CONST.dword[0], ROUND) -FOR i:= 0 to 1 - temp.dword[0] := ROL32(A.dword[i], 12) + E.dword[i] + CONST.dword[0] - S1.dword[0] := ROL32(temp.dword[0], 7) - S2.dword[0] := S1.dword[0] ^ ROL32(A.dword[i], 12) - T1.dword[0] := FF(A.dword[i], B.dword[i], C.dword[i], ROUND) + D.dword[i] + S2.dword[0] + (W.dword[i] ^ W.dword[i+4]) - T2.dword[0] := GG(E.dword[i], F.dword[i], G.dword[i], ROUND) + H.dword[i] + S1.dword[0] + W.dword[i] - D.dword[i+1] := C.dword[i] - C.dword[i+1] := ROL32(B.dword[i], 9) - B.dword[i+1] := A.dword[i] - A.dword[i+1] := T1.dword[0] - H.dword[i+1] := G.dword[i] - G.dword[i+1] := ROL32(F.dword[i], 19) - F.dword[i+1] := E.dword[i] - E.dword[i+1] := P0(T2.dword[0]) - CONST.dword[0] := ROL32(CONST.dword[0], 1) -ENDFOR -dst.dword[3] := A.dword[2] -dst.dword[2] := B.dword[2] -dst.dword[1] := E.dword[2] -dst.dword[0] := F.dword[2] - - - - - - SM3 - AVX -
immintrin.h
- Cryptography -
- - - This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent 128-bit lanes. The calculated results are stored in "dst". - - -BYTE sbox[256] = { -0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, -0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, -0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, -0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, -0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, -0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, -0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, -0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, -0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, -0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, -0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, -0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, -0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, -0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, -0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, -0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 -} -DEFINE ROL32(dword, n) { - count := n % 32 - dest := (dword << count) | (dword >> (32-count)) - RETURN dest -} -DEFINE SBOX_BYTE(dword, i) { - RETURN sbox[dword.byte[i]] -} -DEFINE lower_t(dword) { - tmp.byte[0] := SBOX_BYTE(dword, 0) - tmp.byte[1] := SBOX_BYTE(dword, 1) - tmp.byte[2] := SBOX_BYTE(dword, 2) - tmp.byte[3] := SBOX_BYTE(dword, 3) - RETURN tmp -} -DEFINE L_KEY(dword) { - RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) -} -DEFINE T_KEY(dword) { - RETURN L_KEY(lower_t(dword)) -} -DEFINE F_KEY(X0, X1, X2, X3, round_key) { - RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) -} -FOR i:= 0 to 1 - P.dword[0] := __A.dword[4*i] - P.dword[1] := __A.dword[4*i+1] - P.dword[2] := __A.dword[4*i+2] - P.dword[3] := __A.dword[4*i+3] - C.dword[0] := F_KEY(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[4*i]) - C.dword[1] := F_KEY(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[4*i+1]) - C.dword[2] := F_KEY(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[4*i+2]) - C.dword[3] := F_KEY(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[4*i+3]) - dst.dword[4*i] := C.dword[0] - dst.dword[4*i+1] := C.dword[1] - dst.dword[4*i+2] := C.dword[2] - dst.dword[4*i+3] := C.dword[3] -ENDFOR -dst[MAX:256] := 0 - - - - SM4 - AVX -
immintrin.h
- Cryptography -
- - - This intrinisc performs four rounds of SM4 encryption. The intrinisc operates on independent 128-bit lanes. The calculated results are stored in "dst". - - BYTE sbox[256] = { -0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, -0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, -0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, -0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, -0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, -0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, -0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, -0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, -0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, -0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, -0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, -0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, -0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, -0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, -0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, -0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 -} -DEFINE ROL32(dword, n) { - count := n % 32 - dest := (dword << count) | (dword >> (32-count)) - RETURN dest -} -DEFINE SBOX_BYTE(dword, i) { - RETURN sbox[dword.byte[i]] -} -DEFINE lower_t(dword) { - tmp.byte[0] := SBOX_BYTE(dword, 0) - tmp.byte[1] := SBOX_BYTE(dword, 1) - tmp.byte[2] := SBOX_BYTE(dword, 2) - tmp.byte[3] := SBOX_BYTE(dword, 3) - RETURN tmp -} -DEFINE L_RND(dword) { - tmp := dword - tmp := tmp ^ ROL32(dword, 2) - tmp := tmp ^ ROL32(dword, 10) - tmp := tmp ^ ROL32(dword, 18) - tmp := tmp ^ ROL32(dword, 24) - RETURN tmp -} -DEFINE T_RND(dword) { - RETURN L_RND(lower_t(dword)) -} -DEFINE F_RND(X0, X1, X2, X3, round_key) { - RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) -} -FOR i:= 0 to 1 - P.dword[0] := __A.dword[4*i] - P.dword[1] := __A.dword[4*i+1] - P.dword[2] := __A.dword[4*i+2] - P.dword[3] := __A.dword[4*i+3] - C.dword[0] := F_RND(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[4*i]) - C.dword[1] := F_RND(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[4*i+1]) - C.dword[2] := F_RND(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[4*i+2]) - C.dword[3] := F_RND(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[4*i+3]) - dst.dword[4*i] := C.dword[0] - dst.dword[4*i+1] := C.dword[1] - dst.dword[4*i+2] := C.dword[2] - dst.dword[4*i+3] := C.dword[3] -ENDFOR -dst[MAX:256] := 0 - - - - SM4 - AVX -
immintrin.h
- Cryptography -
- - - This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent 128-bit lanes. The calculated results are stored in "dst". - - -BYTE sbox[256] = { -0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, -0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, -0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, -0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, -0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, -0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, -0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, -0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, -0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, -0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, -0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, -0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, -0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, -0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, -0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, -0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 -} -DEFINE ROL32(dword, n) { - count := n % 32 - dest := (dword << count) | (dword >> (32-count)) - RETURN dest -} -DEFINE SBOX_BYTE(dword, i) { - RETURN sbox[dword.byte[i]] -} -DEFINE lower_t(dword) { - tmp.byte[0] := SBOX_BYTE(dword, 0) - tmp.byte[1] := SBOX_BYTE(dword, 1) - tmp.byte[2] := SBOX_BYTE(dword, 2) - tmp.byte[3] := SBOX_BYTE(dword, 3) - RETURN tmp -} -DEFINE L_KEY(dword) { - RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) -} -DEFINE T_KEY(dword) { - RETURN L_KEY(lower_t(dword)) -} -DEFINE F_KEY(X0, X1, X2, X3, round_key) { - RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) -} -P.dword[0] := __A.dword[0] -P.dword[1] := __A.dword[1] -P.dword[2] := __A.dword[2] -P.dword[3] := __A.dword[3] -C.dword[0] := F_KEY(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[0]) -C.dword[1] := F_KEY(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[1]) -C.dword[2] := F_KEY(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[2]) -C.dword[3] := F_KEY(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[3]) -dst.dword[0] := C.dword[0] -dst.dword[1] := C.dword[1] -dst.dword[2] := C.dword[2] -dst.dword[3] := C.dword[3] -dst[MAX:128] := 0 - - - - SM4 - AVX -
immintrin.h
- Cryptography -
- - - This intrinisc performs four rounds of SM4 encryption. The intrinisc operates on independent 128-bit lanes. The calculated results are stored in "dst". - - -BYTE sbox[256] = { -0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, -0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, -0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, -0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, -0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, -0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, -0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, -0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, -0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, -0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, -0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, -0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, -0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, -0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, -0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, -0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 -} -DEFINE ROL32(dword, n) { - count := n % 32 - dest := (dword << count) | (dword >> (32-count)) - RETURN dest -} -DEFINE SBOX_BYTE(dword, i) { - RETURN sbox[dword.byte[i]] -} -DEFINE lower_t(dword) { - tmp.byte[0] := SBOX_BYTE(dword, 0) - tmp.byte[1] := SBOX_BYTE(dword, 1) - tmp.byte[2] := SBOX_BYTE(dword, 2) - tmp.byte[3] := SBOX_BYTE(dword, 3) - RETURN tmp -} -DEFINE L_RND(dword) { - tmp := dword - tmp := tmp ^ ROL32(dword, 2) - tmp := tmp ^ ROL32(dword, 10) - tmp := tmp ^ ROL32(dword, 18) - tmp := tmp ^ ROL32(dword, 24) - RETURN tmp -} -DEFINE T_RND(dword) { - RETURN L_RND(lower_t(dword)) -} -DEFINE F_RND(X0, X1, X2, X3, round_key) { - RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) -} -P.dword[0] := __A.dword[0] -P.dword[1] := __A.dword[1] -P.dword[2] := __A.dword[2] -P.dword[3] := __A.dword[3] -C.dword[0] := F_RND(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[0]) -C.dword[1] := F_RND(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[1]) -C.dword[2] := F_RND(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[2]) -C.dword[3] := F_RND(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[3]) -dst.dword[0] := C.dword[0] -dst.dword[1] := C.dword[1] -dst.dword[2] := C.dword[2] -dst.dword[3] := C.dword[3] -dst[MAX:128] := 0 - - - - SM4 - AVX -
immintrin.h
- Cryptography -
- - - - Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ACOS(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ACOS(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ACOSH(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ACOSH(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ASIN(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ASIN(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ASINH(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ASINH(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ATAN(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ATAN(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - - Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - - Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ATANH(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ATANH(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := COS(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := COS(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := COSD(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := COSD(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := COSH(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := COSH(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0)) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - - Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0)) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SIN(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SIN(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - - Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SIN(a[i+63:i]) - MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - - Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SIN(a[i+31:i]) - MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SIND(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SIND(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SINH(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SINH(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := TAN(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := TAN(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := TAND(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := TAND(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := TANH(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := TANH(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Trigonometry -
- - - - Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := CubeRoot(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := CubeRoot(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". - -DEFINE CEXP(a[31:0], b[31:0]) { - result[31:0] := POW(FP32(e), a[31:0]) * COS(b[31:0]) - result[63:32] := POW(FP32(e), a[31:0]) * SIN(b[31:0]) - RETURN result -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := CEXP(a[i+31:i], a[i+63:i+32]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". - -DEFINE CLOG(a[31:0], b[31:0]) { - result[31:0] := LOG(SQRT(POW(a, 2.0) + POW(b, 2.0))) - result[63:32] := ATAN2(b, a) - RETURN result -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := CLOG(a[i+31:i], a[i+63:i+32]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed complex snumbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]". - -DEFINE CSQRT(a[31:0], b[31:0]) { - sign[31:0] := (b < 0.0) ? -FP32(1.0) : FP32(1.0) - result[31:0] := SQRT((a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) - result[63:32] := sign * SQRT((-a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0) - RETURN result -} -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := CSQRT(a[i+31:i], a[i+63:i+32]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := POW(e, a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := POW(FP32(e), a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := POW(10.0, a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := POW(FP32(10.0), a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := POW(2.0, a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := POW(FP32(2.0), a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := POW(e, a[i+63:i]) - 1.0 -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0 -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := InvCubeRoot(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := InvCubeRoot(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := InvSQRT(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := InvSQRT(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := LOG(1.0 + a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := LOG(1.0 + a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ConvertExpFP64(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element. - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ConvertExpFP32(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := POW(a[i+63:i], b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - - Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := POW(a[i+31:i], b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SQRT(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SQRT(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Elementary Math Functions -
- - - - Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := CDFNormal(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := CDFNormal(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := InverseCDFNormal(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := InverseCDFNormal(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ERF(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := 1.0 - ERF(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+63:i] := 1.0 - ERF(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i])) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i])) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := 1.0 / ERF(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+63:i] := 1.0 / ERF(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Probability/Statistics -
- - - - - Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 15 - i := 8*j - IF b[i+7:i] == 0 - #DE - FI - dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 7 - i := 16*j - IF b[i+15:i] == 0 - #DE - FI - dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 3 - i := 32*j - IF b[i+31:i] == 0 - #DE - FI - dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 1 - i := 64*j - IF b[i+63:i] == 0 - #DE - FI - dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 15 - i := 8*j - IF b[i+7:i] == 0 - #DE - FI - dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 7 - i := 16*j - IF b[i+15:i] == 0 - #DE - FI - dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 3 - i := 32*j - IF b[i+31:i] == 0 - #DE - FI - dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - -FOR j := 0 to 1 - i := 64*j - IF b[i+63:i] == 0 - #DE - FI - dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ERF(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - - Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr". - FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) - MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 15 - i := 8*j - dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 7 - i := 16*j - dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst". - FOR j := 0 to 1 - i := 64*j - dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 15 - i := 8*j - dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 7 - i := 16*j - dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 1 - i := 64*j - dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst". - FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr". - FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i]) - MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - - Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst". - FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Arithmetic -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := CEIL(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := CEIL(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := FLOOR(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := FLOOR(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ROUND(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ROUND(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Special Math Functions -
- - - - Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction. - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := TRUNCATE(a[i+63:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Miscellaneous -
- - - - Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction. - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := TRUNCATE(a[i+31:i]) -ENDFOR -dst[MAX:128] := 0 - - SSE -
immintrin.h
- Miscellaneous -
- - - - - - - - - Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision (32-bit) floating-point elements in "row0", "row1", "row2", and "row3", and store the transposed matrix in these vectors ("row0" now contains column 0, etc.). - -__m128 tmp3, tmp2, tmp1, tmp0; -tmp0 := _mm_unpacklo_ps(row0, row1); -tmp2 := _mm_unpacklo_ps(row2, row3); -tmp1 := _mm_unpackhi_ps(row0, row1); -tmp3 := _mm_unpackhi_ps(row2, row3); -row0 := _mm_movelh_ps(tmp0, tmp2); -row1 := _mm_movehl_ps(tmp2, tmp0); -row2 := _mm_movelh_ps(tmp1, tmp3); -row3 := _mm_movehl_ps(tmp3, tmp1); - - SSE -
xmmintrin.h
- Swizzle -
- - - - - Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". - -dst[15:0] := (a[63:0] >> (imm8[1:0] * 16))[15:0] -dst[31:16] := 0 - - - SSE -
xmmintrin.h
- Swizzle -
- - - - - Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". - -dst[15:0] := (a[63:0] >> (imm8[1:0] * 16))[15:0] -dst[31:16] := 0 - - - SSE -
xmmintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". - -dst[63:0] := a[63:0] -sel := imm8[1:0]*16 -dst[sel+15:sel] := i[15:0] - - - SSE -
xmmintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". - -dst[63:0] := a[63:0] -sel := imm8[1:0]*16 -dst[sel+15:sel] := i[15:0] - - - SSE -
xmmintrin.h
- Swizzle -
- - - - - Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[15:0] := src[15:0] - 1: tmp[15:0] := src[31:16] - 2: tmp[15:0] := src[47:32] - 3: tmp[15:0] := src[63:48] - ESAC - RETURN tmp[15:0] -} -dst[15:0] := SELECT4(a[63:0], imm8[1:0]) -dst[31:16] := SELECT4(a[63:0], imm8[3:2]) -dst[47:32] := SELECT4(a[63:0], imm8[5:4]) -dst[63:48] := SELECT4(a[63:0], imm8[7:6]) - - - SSE -
xmmintrin.h
- Swizzle -
- - - - - Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[15:0] := src[15:0] - 1: tmp[15:0] := src[31:16] - 2: tmp[15:0] := src[47:32] - 3: tmp[15:0] := src[63:48] - ESAC - RETURN tmp[15:0] -} -dst[15:0] := SELECT4(a[63:0], imm8[1:0]) -dst[31:16] := SELECT4(a[63:0], imm8[3:2]) -dst[47:32] := SELECT4(a[63:0], imm8[5:4]) -dst[63:48] := SELECT4(a[63:0], imm8[7:6]) - - - SSE -
xmmintrin.h
- Swizzle -
- - - - - - Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(b[127:0], imm8[5:4]) -dst[127:96] := SELECT4(b[127:0], imm8[7:6]) - - - SSE -
xmmintrin.h
- Swizzle -
- - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the high half "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) - - - SSE -
xmmintrin.h
- Swizzle -
- - - - - Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) - - - SSE -
xmmintrin.h
- Swizzle -
- - - - Get the unsigned 32-bit value of the MXCSR control and status register. - dst[31:0] := MXCSR - - - SSE -
immintrin.h
- General Support -
- - - - Set the MXCSR control and status register with the value in unsigned 32-bit integer "a". - -MXCSR := a[31:0] - - - SSE -
immintrin.h
- General Support -
- - - Macro: Get the exception state bits from the MXCSR control and status register. The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT - dst[31:0] := MXCSR & _MM_EXCEPT_MASK - - SSE -
immintrin.h
- General Support -
- - - - Macro: Set the exception state bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT - MXCSR := a[31:0] AND ~_MM_EXCEPT_MASK - - SSE -
immintrin.h
- General Support -
- - - Macro: Get the exception mask bits from the MXCSR control and status register. The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT - dst[31:0] := MXCSR & _MM_MASK_MASK - - SSE -
immintrin.h
- General Support -
- - - - Macro: Set the exception mask bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT - MXCSR := a[31:0] AND ~_MM_MASK_MASK - - SSE -
immintrin.h
- General Support -
- - - Macro: Get the rounding mode bits from the MXCSR control and status register. The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO - dst[31:0] := MXCSR & _MM_ROUND_MASK - - SSE -
immintrin.h
- General Support -
- - - - Macro: Set the rounding mode bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO - MXCSR := a[31:0] AND ~_MM_ROUND_MASK - - SSE -
immintrin.h
- General Support -
- - - Macro: Get the flush zero bits from the MXCSR control and status register. The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF - dst[31:0] := MXCSR & _MM_FLUSH_MASK - - SSE -
immintrin.h
- General Support -
- - - - Macro: Set the flush zero bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF - MXCSR := a[31:0] AND ~_MM_FLUSH_MASK - - SSE -
immintrin.h
- General Support -
- - - - - Fetch the line of data from memory that contains address "p" to a location in the cache hierarchy specified by the locality hint "i", which can be one of:<ul> - <li>_MM_HINT_T0 // 3, move data using the T0 hint. The PREFETCHT0 instruction will be generated.</li> - <li>_MM_HINT_T1 // 2, move data using the T1 hint. The PREFETCHT1 instruction will be generated.</li> - <li>_MM_HINT_T2 // 1, move data using the T2 hint. The PREFETCHT2 instruction will be generated.</li> - <li>_MM_HINT_NTA // 0, move data using the non-temporal access (NTA) hint. The PREFETCHNTA instruction will be generated.</li> - - - - - - SSE -
immintrin.h
- General Support -
- - - - Perform a serializing operation on all store-to-memory instructions that were issued prior to this instruction. Guarantees that every store instruction that precedes, in program order, is globally visible before any store instruction which follows the fence in program order. - - SSE -
immintrin.h
- General Support -
- - - - - Allocate "size" bytes of memory, aligned to the alignment specified in "align", and return a pointer to the allocated memory. "_mm_free" should be used to free memory that is allocated with "_mm_malloc". - SSE -
immintrin.h
- General Support -
- - - - Free aligned memory that was allocated with "_mm_malloc". - SSE -
immintrin.h
- General Support -
- - - - Return vector of type __m128 with undefined elements. - SSE -
immintrin.h
- General Support -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst". [min_float_note] - -dst[31:0] := MIN(a[31:0], b[31:0]) -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst". [max_float_note] - -dst[31:0] := MAX(a[31:0], b[31:0]) -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Special Math Functions -
- - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 3 - i := j*16 - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] -ENDFOR - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 3 - i := j*16 - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] -ENDFOR - - - SSE -
xmmintrin.h
- Arithmetic -
- - Miscellaneous - - - - Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst". - -FOR j := 0 to 7 - i := j*8 - tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) -ENDFOR -dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56] -dst[63:16] := 0 - - - SSE -
xmmintrin.h
- Arithmetic -
- - Miscellaneous - - - - Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst". - -FOR j := 0 to 7 - i := j*8 - tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) -ENDFOR -dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56] -dst[63:16] := 0 - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := a[31:0] + b[31:0] -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := a[31:0] - b[31:0] -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := a[31:0] * b[31:0] -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] * b[i+31:i] -ENDFOR - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := a[31:0] / b[31:0] -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := a[i+31:i] / b[i+31:i] -ENDFOR - - - SSE -
xmmintrin.h
- Arithmetic -
- - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 -ENDFOR - - - SSE -
xmmintrin.h
- Probability/Statistics -
- - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 -ENDFOR - - - SSE -
xmmintrin.h
- Probability/Statistics -
- - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 -ENDFOR - - - SSE -
xmmintrin.h
- Probability/Statistics -
- - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 -ENDFOR - - - SSE -
xmmintrin.h
- Probability/Statistics -
- - - - - Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Convert -
- - - - - Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Convert -
- - - - - Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int64_To_FP32(b[63:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - SSE -
xmmintrin.h
- Convert -
- - - - - Convert packed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[63:32] := Convert_Int32_To_FP32(b[63:32]) -dst[95:64] := a[95:64] -dst[127:96] := a[127:96] - - - SSE -
xmmintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_Int32_To_FP32(b[31:0]) -dst[63:32] := Convert_Int32_To_FP32(b[63:32]) -dst[95:64] := a[95:64] -dst[127:96] := a[127:96] - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert packed 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - m := j*32 - dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) -ENDFOR - - SSE -
xmmintrin.h
- Convert -
- - - - Convert packed unsigned 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*16 - m := j*32 - dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) -ENDFOR - - SSE -
xmmintrin.h
- Convert -
- - - - Convert the lower packed 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*8 - m := j*32 - dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) -ENDFOR - - SSE -
xmmintrin.h
- Convert -
- - - - Convert the lower packed unsigned 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := j*8 - m := j*32 - dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) -ENDFOR - - SSE -
xmmintrin.h
- Convert -
- - - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", then covert the packed signed 32-bit integers in "b" to single-precision (32-bit) floating-point element, and store the results in the upper 2 elements of "dst". - -dst[31:0] := Convert_Int32_To_FP32(a[31:0]) -dst[63:32] := Convert_Int32_To_FP32(a[63:32]) -dst[95:64] := Convert_Int32_To_FP32(b[31:0]) -dst[127:96] := Convert_Int32_To_FP32(b[63:32]) - - SSE -
xmmintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - -dst[31:0] := Convert_FP32_To_Int32(a[31:0]) - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - -dst[31:0] := Convert_FP32_To_Int32(a[31:0]) - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - -dst[63:0] := Convert_FP32_To_Int64(a[31:0]) - - - SSE -
xmmintrin.h
- Convert -
- - - - Copy the lower single-precision (32-bit) floating-point element of "a" to "dst". - -dst[31:0] := a[31:0] - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - -dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - -dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - -dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". Note: this intrinsic will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and 0x7FFFFFFF. - -FOR j := 0 to 3 - i := 16*j - k := 32*j - IF a[k+31:k] >= FP32(0x7FFF) && a[k+31:k] <= FP32(0x7FFFFFFF) - dst[i+15:i] := 0x7FFF - ELSE - dst[i+15:i] := Convert_FP32_To_Int16(a[k+31:k]) - FI -ENDFOR - - SSE -
xmmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 8-bit integers, and store the results in lower 4 elements of "dst". Note: this intrinsic will generate 0x7F, rather than 0x80, for input values between 0x7F and 0x7FFFFFFF. - -FOR j := 0 to 3 - i := 8*j - k := 32*j - IF a[k+31:k] >= FP32(0x7F) && a[k+31:k] <= FP32(0x7FFFFFFF) - dst[i+7:i] := 0x7F - ELSE - dst[i+7:i] := Convert_FP32_To_Int8(a[k+31:k]) - FI -ENDFOR - - SSE -
xmmintrin.h
- Convert -
- - - - - Store 64-bits of integer data from "a" into memory using a non-temporal memory hint. - -MEM[mem_addr+63:mem_addr] := a[63:0] - - - SSE -
immintrin.h
- Store -
- - - - - - Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. - -FOR j := 0 to 7 - i := j*8 - IF mask[i+7] - MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] - FI -ENDFOR - - - SSE -
immintrin.h
- Store -
- - - - - - Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element). - -FOR j := 0 to 7 - i := j*8 - IF mask[i+7] - MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] - FI -ENDFOR - - - SSE -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE -
immintrin.h
- Store -
- - - - - Store the upper 2 single-precision (32-bit) floating-point elements from "a" into memory. - -MEM[mem_addr+31:mem_addr] := a[95:64] -MEM[mem_addr+63:mem_addr+32] := a[127:96] - - - SSE -
immintrin.h
- Store -
- - - - - Store the lower 2 single-precision (32-bit) floating-point elements from "a" into memory. - -MEM[mem_addr+31:mem_addr] := a[31:0] -MEM[mem_addr+63:mem_addr+32] := a[63:32] - - - SSE -
immintrin.h
- Store -
- - - - - Store the lower single-precision (32-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+31:mem_addr] := a[31:0] - - - SSE -
immintrin.h
- Store -
- - - - - Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+31:mem_addr] := a[31:0] -MEM[mem_addr+63:mem_addr+32] := a[31:0] -MEM[mem_addr+95:mem_addr+64] := a[31:0] -MEM[mem_addr+127:mem_addr+96] := a[31:0] - - SSE -
immintrin.h
- Store -
- - - - - Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+31:mem_addr] := a[31:0] -MEM[mem_addr+63:mem_addr+32] := a[31:0] -MEM[mem_addr+95:mem_addr+64] := a[31:0] -MEM[mem_addr+127:mem_addr+96] := a[31:0] - - SSE -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE -
immintrin.h
- Store -
- - - - - Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE -
immintrin.h
- Store -
- - - - - Store 4 single-precision (32-bit) floating-point elements from "a" into memory in reverse order. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+31:mem_addr] := a[127:96] -MEM[mem_addr+63:mem_addr+32] := a[95:64] -MEM[mem_addr+95:mem_addr+64] := a[63:32] -MEM[mem_addr+127:mem_addr+96] := a[31:0] - - - SSE -
immintrin.h
- Store -
- - - - Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[j] := a[i+7] -ENDFOR -dst[MAX:8] := 0 - - - SSE -
xmmintrin.h
- Miscellaneous -
- - - - Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[j] := a[i+7] -ENDFOR -dst[MAX:8] := 0 - - - SSE -
xmmintrin.h
- Miscellaneous -
- - - - Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a". - -FOR j := 0 to 3 - i := j*32 - IF a[i+31] - dst[j] := 1 - ELSE - dst[j] := 0 - FI -ENDFOR -dst[MAX:4] := 0 - - - SSE -
xmmintrin.h
- Miscellaneous -
- - - - Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := SQRT(a[31:0]) -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SQRT(a[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -dst[31:0] := (1.0 / a[31:0]) -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (1.0 / a[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -dst[31:0] := (1.0 / SQRT(a[31:0])) -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Elementary Math Functions -
- - - - Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (1.0 / SQRT(a[i+31:i])) -ENDFOR - - - SSE -
xmmintrin.h
- Elementary Math Functions -
- - - - - Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (a[i+31:i] AND b[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Logical -
- - - - - Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i]) -ENDFOR - - - SSE -
xmmintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] OR b[i+31:i] -ENDFOR - - - SSE -
xmmintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] XOR b[i+31:i] -ENDFOR - - - SSE -
xmmintrin.h
- Logical -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := ( a[31:0] == b[31:0] ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := ( a[31:0] < b[31:0] ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := ( a[31:0] <= b[31:0] ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] <= b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := ( a[31:0] > b[31:0] ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := ( a[31:0] >= b[31:0] ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] >= b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := ( a[31:0] != b[31:0] ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] != b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := (!( a[31:0] < b[31:0] )) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := !( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := (!( a[31:0] <= b[31:0] )) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (!( a[i+31:i] <= b[i+31:i] )) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := (!( a[31:0] > b[31:0] )) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (!( a[i+31:i] > b[i+31:i] )) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := (!( a[31:0] >= b[31:0] )) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := (!( a[i+31:i] >= b[i+31:i] )) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - dst[31:0] := ( a[31:0] != NaN AND b[31:0] != NaN ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] != NaN AND b[i+31:i] != NaN ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - dst[31:0] := ( a[31:0] == NaN OR b[31:0] == NaN ) ? 0xFFFFFFFF : 0 -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst". - FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] == NaN OR b[i+31:i] == NaN ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] < b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] <= b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] > b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] >= b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). - RETURN ( a[31:0] == NaN OR b[31:0] == NaN OR a[31:0] != b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] < b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] <= b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] > b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] >= b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - - Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[31:0] == NaN OR b[31:0] == NaN OR a[31:0] != b[31:0] ) ? 1 : 0 - - - SSE -
xmmintrin.h
- Compare -
- - - - Copy single-precision (32-bit) floating-point element "a" to the lower element of "dst", and zero the upper 3 elements. - -dst[31:0] := a[31:0] -dst[127:32] := 0 - - SSE -
xmmintrin.h
- Set -
- - - - Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR - - SSE -
xmmintrin.h
- Set -
- - - - Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR - - SSE -
xmmintrin.h
- Set -
- - - - - - - Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values. - -dst[31:0] := e0 -dst[63:32] := e1 -dst[95:64] := e2 -dst[127:96] := e3 - - SSE -
xmmintrin.h
- Set -
- - - - - - - Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst[31:0] := e3 -dst[63:32] := e2 -dst[95:64] := e1 -dst[127:96] := e0 - - SSE -
xmmintrin.h
- Set -
- - - - Return vector of type __m128 with all elements set to zero. - -dst[MAX:0] := 0 - - - SSE -
xmmintrin.h
- Set -
- - - - - Load 2 single-precision (32-bit) floating-point elements from memory into the upper 2 elements of "dst", and copy the lower 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. - -dst[31:0] := a[31:0] -dst[63:32] := a[63:32] -dst[95:64] := MEM[mem_addr+31:mem_addr] -dst[127:96] := MEM[mem_addr+63:mem_addr+32] - - - SSE -
immintrin.h
- Load -
- - - - - Load 2 single-precision (32-bit) floating-point elements from memory into the lower 2 elements of "dst", and copy the upper 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. - -dst[31:0] := MEM[mem_addr+31:mem_addr] -dst[63:32] := MEM[mem_addr+63:mem_addr+32] -dst[95:64] := a[95:64] -dst[127:96] := a[127:96] - - - SSE -
immintrin.h
- Load -
- - - - Load a single-precision (32-bit) floating-point element from memory into the lower of "dst", and zero the upper 3 elements. "mem_addr" does not need to be aligned on any particular boundary. - -dst[31:0] := MEM[mem_addr+31:mem_addr] -dst[127:32] := 0 - - - SSE -
immintrin.h
- Load -
- - - - Load a single-precision (32-bit) floating-point element from memory into all elements of "dst". - -dst[31:0] := MEM[mem_addr+31:mem_addr] -dst[63:32] := MEM[mem_addr+31:mem_addr] -dst[95:64] := MEM[mem_addr+31:mem_addr] -dst[127:96] := MEM[mem_addr+31:mem_addr] - - SSE -
immintrin.h
- Load -
- - - - Load a single-precision (32-bit) floating-point element from memory into all elements of "dst". - -dst[31:0] := MEM[mem_addr+31:mem_addr] -dst[63:32] := MEM[mem_addr+31:mem_addr] -dst[95:64] := MEM[mem_addr+31:mem_addr] -dst[127:96] := MEM[mem_addr+31:mem_addr] - - SSE -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[127:0] := MEM[mem_addr+127:mem_addr] - - - SSE -
immintrin.h
- Load -
- - - - Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] - - - SSE -
immintrin.h
- Load -
- - - - Load 4 single-precision (32-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[31:0] := MEM[mem_addr+127:mem_addr+96] -dst[63:32] := MEM[mem_addr+95:mem_addr+64] -dst[95:64] := MEM[mem_addr+63:mem_addr+32] -dst[127:96] := MEM[mem_addr+31:mem_addr] - - SSE -
immintrin.h
- Load -
- - - - - Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := b[31:0] -dst[127:32] := a[127:32] - - - SSE -
xmmintrin.h
- Move -
- - - - - Move the upper 2 single-precision (32-bit) floating-point elements from "b" to the lower 2 elements of "dst", and copy the upper 2 elements from "a" to the upper 2 elements of "dst". - -dst[31:0] := b[95:64] -dst[63:32] := b[127:96] -dst[95:64] := a[95:64] -dst[127:96] := a[127:96] - - - SSE -
xmmintrin.h
- Move -
- - - - - Move the lower 2 single-precision (32-bit) floating-point elements from "b" to the upper 2 elements of "dst", and copy the lower 2 elements from "a" to the lower 2 elements of "dst". - -dst[31:0] := a[31:0] -dst[63:32] := a[63:32] -dst[95:64] := b[31:0] -dst[127:96] := b[63:32] - - - SSE -
xmmintrin.h
- Move -
- - - - - - Return vector of type __m128d with undefined elements. - SSE2 -
emmintrin.h
- General Support -
- - - - Return vector of type __m128i with undefined elements. - SSE2 -
emmintrin.h
- General Support -
- - - - Provide a hint to the processor that the code sequence is a spin-wait loop. This can help improve the performance and power consumption of spin-wait loops. - - SSE2 -
emmintrin.h
- General Support -
- - - - Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy. - - SSE2 -
emmintrin.h
- General Support -
- - - - Perform a serializing operation on all load-from-memory instructions that were issued prior to this instruction. Guarantees that every load instruction that precedes, in program order, is globally visible before any load instruction which follows the fence in program order. - - SSE2 -
emmintrin.h
- General Support -
- - - - Perform a serializing operation on all load-from-memory and store-to-memory instructions that were issued prior to this instruction. Guarantees that every memory access that precedes, in program order, the memory fence instruction is globally visible before any memory instruction which follows the fence in program order. - - SSE2 -
emmintrin.h
- General Support -
- - - - Load unaligned 64-bit integer from memory into the first element of "dst". - -dst[63:0] := MEM[mem_addr+63:mem_addr] -dst[MAX:64] := 0 - - - SSE2 -
immintrin.h
- Load -
- - - - Load unaligned 16-bit integer from memory into the first element of "dst". - -dst[15:0] := MEM[mem_addr+15:mem_addr] -dst[MAX:16] := 0 - - SSE2 -
immintrin.h
- Load -
- - - - Load unaligned 32-bit integer from memory into the first element of "dst". - -dst[31:0] := MEM[mem_addr+31:mem_addr] -dst[MAX:32] := 0 - - - SSE2 -
emmintrin.h
- Load -
- - - - Load 64-bit integer from memory into the first element of "dst". - -dst[63:0] := MEM[mem_addr+63:mem_addr] -dst[MAX:64] := 0 - - - SSE2 -
emmintrin.h
- Load -
- - - - Load 128-bits of integer data from memory into "dst". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[127:0] := MEM[mem_addr+127:mem_addr] - - - SSE2 -
emmintrin.h
- Load -
- - - - Load 128-bits of integer data from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] - - - SSE2 -
emmintrin.h
- Load -
- - - - Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst". - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[127:0] := MEM[mem_addr+127:mem_addr] - - - SSE2 -
emmintrin.h
- Load -
- - - - Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". - -dst[63:0] := MEM[mem_addr+63:mem_addr] -dst[127:64] := MEM[mem_addr+63:mem_addr] - - - SSE2 -
emmintrin.h
- Load -
- - - - Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". - -dst[63:0] := MEM[mem_addr+63:mem_addr] -dst[127:64] := MEM[mem_addr+63:mem_addr] - - - SSE2 -
emmintrin.h
- Load -
- - - - Load 2 double-precision (64-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[63:0] := MEM[mem_addr+127:mem_addr+64] -dst[127:64] := MEM[mem_addr+63:mem_addr] - - - SSE2 -
emmintrin.h
- Load -
- - - - Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst". - "mem_addr" does not need to be aligned on any particular boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] - - - SSE2 -
emmintrin.h
- Load -
- - - - Load a double-precision (64-bit) floating-point element from memory into the lower of "dst", and zero the upper element. "mem_addr" does not need to be aligned on any particular boundary. - -dst[63:0] := MEM[mem_addr+63:mem_addr] -dst[127:64] := 0 - - - SSE2 -
emmintrin.h
- Load -
- - - - - Load a double-precision (64-bit) floating-point element from memory into the upper element of "dst", and copy the lower element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. - -dst[63:0] := a[63:0] -dst[127:64] := MEM[mem_addr+63:mem_addr] - - - SSE2 -
emmintrin.h
- Load -
- - - - - Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst", and copy the upper element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. - -dst[63:0] := MEM[mem_addr+63:mem_addr] -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Load -
- - - - - Store 16-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+15:mem_addr] := a[15:0] - - SSE2 -
immintrin.h
- Store -
- - - - - Store 64-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+63:mem_addr] := a[63:0] - - - SSE2 -
immintrin.h
- Store -
- - - - - Store 32-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+31:mem_addr] := a[31:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - - Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. "mem_addr" does not need to be aligned on any particular boundary. - -FOR j := 0 to 15 - i := j*8 - IF mask[i+7] - MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i] - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 128-bits of integer data from "a" into memory. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 128-bits of integer data from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 64-bit integer from the first element of "a" into memory. - -MEM[mem_addr+63:mem_addr] := a[63:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 128-bits of integer data from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 32-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated. - -MEM[mem_addr+31:mem_addr] := a[31:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 64-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated. - -MEM[mem_addr+63:mem_addr] := a[63:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store the lower double-precision (64-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+63:mem_addr] := a[63:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+63:mem_addr] := a[63:0] -MEM[mem_addr+127:mem_addr+64] := a[63:0] - - SSE2 -
emmintrin.h
- Store -
- - - - - Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+63:mem_addr] := a[63:0] -MEM[mem_addr+127:mem_addr+64] := a[63:0] - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory. - "mem_addr" does not need to be aligned on any particular boundary. - -MEM[mem_addr+127:mem_addr] := a[127:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store 2 double-precision (64-bit) floating-point elements from "a" into memory in reverse order. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -MEM[mem_addr+63:mem_addr] := a[127:64] -MEM[mem_addr+127:mem_addr+64] := a[63:0] - - SSE2 -
emmintrin.h
- Store -
- - - - - Store the upper double-precision (64-bit) floating-point element from "a" into memory. - -MEM[mem_addr+63:mem_addr] := a[127:64] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Store the lower double-precision (64-bit) floating-point element from "a" into memory. - -MEM[mem_addr+63:mem_addr] := a[63:0] - - - SSE2 -
emmintrin.h
- Store -
- - - - - Add packed 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := a[i+7:i] + b[i+7:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add packed 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := a[i+15:i] + b[i+15:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add packed 32-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] + b[i+31:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add 64-bit integers "a" and "b", and store the result in "dst". - -dst[63:0] := a[63:0] + b[63:0] - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add packed 64-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] + b[i+63:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] ) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] ) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] ) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 7 - i := j*16 - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[31:16] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 7 - i := j*16 - tmp[31:0] := a[i+15:i] * b[i+15:i] - dst[i+15:i] := tmp[31:16] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". - -FOR j := 0 to 7 - i := j*16 - tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i]) - dst[i+15:i] := tmp[15:0] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Multiply the low unsigned 32-bit integers from "a" and "b", and store the unsigned 64-bit result in "dst". - -dst[63:0] := a[31:0] * b[31:0] - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+31:i] * b[i+31:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - Miscellaneous - - - - Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". - -FOR j := 0 to 15 - i := j*8 - tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) -ENDFOR -FOR j := 0 to 1 - i := j*64 - dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \ - tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56] - dst[i+63:i+16] := 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := a[i+7:i] - b[i+7:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := a[i+15:i] - b[i+15:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[i+31:i] - b[i+31:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract 64-bit integer "b" from 64-bit integer "a", and store the result in "dst". - -dst[63:0] := a[63:0] - b[63:0] - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] - b[i+63:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := a[63:0] + b[63:0] -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] + b[i+63:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := a[63:0] / b[63:0] -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - dst[i+63:i] := a[i+63:i] / b[i+63:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := a[63:0] * b[63:0] -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] * b[i+63:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := a[63:0] - b[63:0] -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] - b[i+63:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Arithmetic -
- - - - - Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 -ENDFOR - - - SSE2 -
emmintrin.h
- Probability/Statistics -
- - - - - Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 -ENDFOR - - - SSE2 -
emmintrin.h
- Probability/Statistics -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Special Math Functions -
- - - - - Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Special Math Functions -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [max_float_note] - -dst[63:0] := MAX(a[63:0], b[63:0]) -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Special Math Functions -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note] - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := MAX(a[i+63:i], b[i+63:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Special Math Functions -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [min_float_note] - -dst[63:0] := MIN(a[63:0], b[63:0]) -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Special Math Functions -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note] - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := MIN(a[i+63:i], b[i+63:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Special Math Functions -
- - - - - Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] << (tmp*8) - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] << (tmp*8) - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] >> (tmp*8) - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) - ELSE - dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) - ELSE - dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". - -tmp := imm8[7:0] -IF tmp > 15 - tmp := 16 -FI -dst[127:0] := a[127:0] >> (tmp*8) - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF imm8[7:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF count[63:0] > 15 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF imm8[7:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF count[63:0] > 31 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF imm8[7:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF count[63:0] > 63 - dst[i+63:i] := 0 - ELSE - dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) - FI -ENDFOR - - - SSE2 -
emmintrin.h
- Shift -
- - - - - Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[127:0] := (a[127:0] AND b[127:0]) - - - SSE2 -
emmintrin.h
- Logical -
- - - - - Compute the bitwise NOT of 128 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". - -dst[127:0] := ((NOT a[127:0]) AND b[127:0]) - - - SSE2 -
emmintrin.h
- Logical -
- - - - - Compute the bitwise OR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[127:0] := (a[127:0] OR b[127:0]) - - - SSE2 -
emmintrin.h
- Logical -
- - - - - Compute the bitwise XOR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". - -dst[127:0] := (a[127:0] XOR b[127:0]) - - - SSE2 -
emmintrin.h
- Logical -
- - - - - Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] AND b[i+63:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Logical -
- - - - - Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Logical -
- - - - - Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] OR b[i+63:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Logical -
- - - - - Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[i+63:i] XOR b[i+63:i] -ENDFOR - - - SSE2 -
emmintrin.h
- Logical -
- - - - - Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtb instruction with the order of the operands switched. - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := ( a[i+7:i] < b[i+7:i] ) ? 0xFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtw instruction with the order of the operands switched. - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ( a[i+15:i] < b[i+15:i] ) ? 0xFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtd instruction with the order of the operands switched. - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ( a[i+31:i] < b[i+31:i] ) ? 0xFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (a[63:0] == b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (a[63:0] < b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (a[63:0] <= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (a[63:0] > b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (a[63:0] >= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - dst[63:0] := (a[63:0] != NaN AND b[63:0] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - dst[63:0] := (a[63:0] == NaN OR b[63:0] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (a[63:0] != b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (!(a[63:0] < b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (!(a[63:0] <= b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (!(a[63:0] > b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := (!(a[63:0] >= b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0 -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] == b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] < b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] <= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] > b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] >= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst". - FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (a[i+63:i] != b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (!(a[i+63:i] < b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (!(a[i+63:i] <= b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (!(a[i+63:i] > b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := (!(a[i+63:i] >= b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] == b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] < b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] <= b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] > b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] >= b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). - RETURN ( a[63:0] == NaN OR b[63:0] == NaN OR a[63:0] != b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] == b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] < b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] <= b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] > b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[63:0] != NaN AND b[63:0] != NaN AND a[63:0] >= b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - - Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. - RETURN ( a[63:0] == NaN OR b[63:0] == NaN OR a[63:0] != b[63:0] ) ? 1 : 0 - - - SSE2 -
emmintrin.h
- Compare -
- - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - m := j*64 - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - - Convert the signed 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := Convert_Int32_To_FP64(b[31:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - - Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := Convert_Int64_To_FP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - - Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := Convert_Int64_To_FP64(b[63:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := j*32 - m := j*64 - dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst". - -dst[31:0] := a[31:0] -dst[127:32] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element. - -dst[63:0] := a[63:0] -dst[127:64] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element. - -dst[63:0] := a[63:0] -dst[127:64] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - Copy the lower 32-bit integer in "a" to "dst". - -dst[31:0] := a[31:0] - - - SSE2 -
emmintrin.h
- Convert -
- - - - Copy the lower 64-bit integer in "a" to "dst". - -dst[63:0] := a[63:0] - - - SSE2 -
emmintrin.h
- Convert -
- - - - Copy the lower 64-bit integer in "a" to "dst". - -dst[63:0] := a[63:0] - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k]) -ENDFOR -dst[127:64] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 32*j - dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". - -dst[31:0] := Convert_FP64_To_Int32(a[63:0]) - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - -dst[63:0] := Convert_FP64_To_Int64(a[63:0]) - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". - -dst[63:0] := Convert_FP64_To_Int64(a[63:0]) - - - SSE2 -
emmintrin.h
- Convert -
- - - - - Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := Convert_FP64_To_FP32(b[63:0]) -dst[127:32] := a[127:32] -dst[MAX:128] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". - -dst[63:0] := a[63:0] - - - SSE2 -
emmintrin.h
- Convert -
- - - - - Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := Convert_FP32_To_FP64(b[31:0]) -dst[127:64] := a[127:64] -dst[MAX:128] := 0 - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". - -dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - -dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". - -dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". - -FOR j := 0 to 1 - i := 32*j - k := 64*j - dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k]) -ENDFOR - - - SSE2 -
emmintrin.h
- Convert -
- - - - - Set packed 64-bit integers in "dst" with the supplied values. - -dst[63:0] := e0 -dst[127:64] := e1 - - SSE2 -
emmintrin.h
- Set -
- - - - - Set packed 64-bit integers in "dst" with the supplied values. - -dst[63:0] := e0 -dst[127:64] := e1 - - SSE2 -
emmintrin.h
- Set -
- - - - - - - Set packed 32-bit integers in "dst" with the supplied values. - -dst[31:0] := e0 -dst[63:32] := e1 -dst[95:64] := e2 -dst[127:96] := e3 - - SSE2 -
emmintrin.h
- Set -
- - - - - - - - - - - Set packed 16-bit integers in "dst" with the supplied values. - -dst[15:0] := e0 -dst[31:16] := e1 -dst[47:32] := e2 -dst[63:48] := e3 -dst[79:64] := e4 -dst[95:80] := e5 -dst[111:96] := e6 -dst[127:112] := e7 - - SSE2 -
emmintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed 8-bit integers in "dst" with the supplied values. - -dst[7:0] := e0 -dst[15:8] := e1 -dst[23:16] := e2 -dst[31:24] := e3 -dst[39:32] := e4 -dst[47:40] := e5 -dst[55:48] := e6 -dst[63:56] := e7 -dst[71:64] := e8 -dst[79:72] := e9 -dst[87:80] := e10 -dst[95:88] := e11 -dst[103:96] := e12 -dst[111:104] := e13 -dst[119:112] := e14 -dst[127:120] := e15 - - SSE2 -
emmintrin.h
- Set -
- - - - Broadcast 64-bit integer "a" to all elements of "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR - - SSE2 -
emmintrin.h
- Set -
- - - - Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR - - SSE2 -
emmintrin.h
- Set -
- - - - Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastd". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := a[31:0] -ENDFOR - - SSE2 -
emmintrin.h
- Set -
- - - - Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate "vpbroadcastw". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := a[15:0] -ENDFOR - - SSE2 -
emmintrin.h
- Set -
- - - - Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastb". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := a[7:0] -ENDFOR - - SSE2 -
emmintrin.h
- Set -
- - - - - Set packed 64-bit integers in "dst" with the supplied values in reverse order. - -dst[63:0] := e1 -dst[127:64] := e0 - - SSE2 -
emmintrin.h
- Set -
- - - - - - - Set packed 32-bit integers in "dst" with the supplied values in reverse order. - -dst[31:0] := e3 -dst[63:32] := e2 -dst[95:64] := e1 -dst[127:96] := e0 - - SSE2 -
emmintrin.h
- Set -
- - - - - - - - - - - Set packed 16-bit integers in "dst" with the supplied values in reverse order. - -dst[15:0] := e7 -dst[31:16] := e6 -dst[47:32] := e5 -dst[63:48] := e4 -dst[79:64] := e3 -dst[95:80] := e2 -dst[111:96] := e1 -dst[127:112] := e0 - - SSE2 -
emmintrin.h
- Set -
- - - - - - - - - - - - - - - - - - - Set packed 8-bit integers in "dst" with the supplied values in reverse order. - -dst[7:0] := e15 -dst[15:8] := e14 -dst[23:16] := e13 -dst[31:24] := e12 -dst[39:32] := e11 -dst[47:40] := e10 -dst[55:48] := e9 -dst[63:56] := e8 -dst[71:64] := e7 -dst[79:72] := e6 -dst[87:80] := e5 -dst[95:88] := e4 -dst[103:96] := e3 -dst[111:104] := e2 -dst[119:112] := e1 -dst[127:120] := e0 - - SSE2 -
emmintrin.h
- Set -
- - - Return vector of type __m128i with all elements set to zero. - -dst[MAX:0] := 0 - - - SSE2 -
emmintrin.h
- Set -
- - - - Copy double-precision (64-bit) floating-point element "a" to the lower element of "dst", and zero the upper element. - -dst[63:0] := a[63:0] -dst[127:64] := 0 - - SSE2 -
emmintrin.h
- Set -
- - - - Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR - - SSE2 -
emmintrin.h
- Set -
- - - - Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := a[63:0] -ENDFOR - - SSE2 -
emmintrin.h
- Set -
- - - - - Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. - -dst[63:0] := e0 -dst[127:64] := e1 - - SSE2 -
emmintrin.h
- Set -
- - - - - Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. - -dst[63:0] := e1 -dst[127:64] := e0 - - SSE2 -
emmintrin.h
- Set -
- - - - Return vector of type __m128d with all elements set to zero. - -dst[MAX:0] := 0 - - - SSE2 -
emmintrin.h
- Set -
- - - - Copy the lower 64-bit integer in "a" to "dst". - -dst[63:0] := a[63:0] - - - SSE2 -
emmintrin.h
- Miscellaneous -
- - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". - -dst[7:0] := Saturate8(a[15:0]) -dst[15:8] := Saturate8(a[31:16]) -dst[23:16] := Saturate8(a[47:32]) -dst[31:24] := Saturate8(a[63:48]) -dst[39:32] := Saturate8(a[79:64]) -dst[47:40] := Saturate8(a[95:80]) -dst[55:48] := Saturate8(a[111:96]) -dst[63:56] := Saturate8(a[127:112]) -dst[71:64] := Saturate8(b[15:0]) -dst[79:72] := Saturate8(b[31:16]) -dst[87:80] := Saturate8(b[47:32]) -dst[95:88] := Saturate8(b[63:48]) -dst[103:96] := Saturate8(b[79:64]) -dst[111:104] := Saturate8(b[95:80]) -dst[119:112] := Saturate8(b[111:96]) -dst[127:120] := Saturate8(b[127:112]) - - - SSE2 -
emmintrin.h
- Miscellaneous -
- - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". - -dst[15:0] := Saturate16(a[31:0]) -dst[31:16] := Saturate16(a[63:32]) -dst[47:32] := Saturate16(a[95:64]) -dst[63:48] := Saturate16(a[127:96]) -dst[79:64] := Saturate16(b[31:0]) -dst[95:80] := Saturate16(b[63:32]) -dst[111:96] := Saturate16(b[95:64]) -dst[127:112] := Saturate16(b[127:96]) - - - SSE2 -
emmintrin.h
- Miscellaneous -
- - - - - Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". - -dst[7:0] := SaturateU8(a[15:0]) -dst[15:8] := SaturateU8(a[31:16]) -dst[23:16] := SaturateU8(a[47:32]) -dst[31:24] := SaturateU8(a[63:48]) -dst[39:32] := SaturateU8(a[79:64]) -dst[47:40] := SaturateU8(a[95:80]) -dst[55:48] := SaturateU8(a[111:96]) -dst[63:56] := SaturateU8(a[127:112]) -dst[71:64] := SaturateU8(b[15:0]) -dst[79:72] := SaturateU8(b[31:16]) -dst[87:80] := SaturateU8(b[47:32]) -dst[95:88] := SaturateU8(b[63:48]) -dst[103:96] := SaturateU8(b[79:64]) -dst[111:104] := SaturateU8(b[95:80]) -dst[119:112] := SaturateU8(b[111:96]) -dst[127:120] := SaturateU8(b[127:112]) - - - SSE2 -
emmintrin.h
- Miscellaneous -
- - - - Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[j] := a[i+7] -ENDFOR -dst[MAX:16] := 0 - - - SSE2 -
emmintrin.h
- Miscellaneous -
- - - - Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a". - -FOR j := 0 to 1 - i := j*64 - IF a[i+63] - dst[j] := 1 - ELSE - dst[j] := 0 - FI -ENDFOR -dst[MAX:2] := 0 - - - SSE2 -
emmintrin.h
- Miscellaneous -
- - - - Copy the 64-bit integer "a" to the lower element of "dst", and zero the upper element. - -dst[63:0] := a[63:0] -dst[127:64] := 0 - - - SSE2 -
emmintrin.h
- Move -
- - - - Copy the lower 64-bit integer in "a" to the lower element of "dst", and zero the upper element. - -dst[63:0] := a[63:0] -dst[127:64] := 0 - - - SSE2 -
emmintrin.h
- Move -
- - - - - Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := b[63:0] -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Move -
- - - - - Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". - -dst[15:0] := (a[127:0] >> (imm8[2:0] * 16))[15:0] -dst[31:16] := 0 - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". - -dst[127:0] := a[127:0] -sel := imm8[2:0]*16 -dst[sel+15:sel] := i[15:0] - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst". - -DEFINE SELECT4(src, control) { - CASE(control[1:0]) OF - 0: tmp[31:0] := src[31:0] - 1: tmp[31:0] := src[63:32] - 2: tmp[31:0] := src[95:64] - 3: tmp[31:0] := src[127:96] - ESAC - RETURN tmp[31:0] -} -dst[31:0] := SELECT4(a[127:0], imm8[1:0]) -dst[63:32] := SELECT4(a[127:0], imm8[3:2]) -dst[95:64] := SELECT4(a[127:0], imm8[5:4]) -dst[127:96] := SELECT4(a[127:0], imm8[7:6]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst". - -dst[63:0] := a[63:0] -dst[79:64] := (a >> (imm8[1:0] * 16))[79:64] -dst[95:80] := (a >> (imm8[3:2] * 16))[79:64] -dst[111:96] := (a >> (imm8[5:4] * 16))[79:64] -dst[127:112] := (a >> (imm8[7:6] * 16))[79:64] - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst". - -dst[15:0] := (a >> (imm8[1:0] * 16))[15:0] -dst[31:16] := (a >> (imm8[3:2] * 16))[15:0] -dst[47:32] := (a >> (imm8[5:4] * 16))[15:0] -dst[63:48] := (a >> (imm8[7:6] * 16))[15:0] -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[71:64] - dst[15:8] := src2[71:64] - dst[23:16] := src1[79:72] - dst[31:24] := src2[79:72] - dst[39:32] := src1[87:80] - dst[47:40] := src2[87:80] - dst[55:48] := src1[95:88] - dst[63:56] := src2[95:88] - dst[71:64] := src1[103:96] - dst[79:72] := src2[103:96] - dst[87:80] := src1[111:104] - dst[95:88] := src2[111:104] - dst[103:96] := src1[119:112] - dst[111:104] := src2[119:112] - dst[119:112] := src1[127:120] - dst[127:120] := src2[127:120] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[79:64] - dst[31:16] := src2[79:64] - dst[47:32] := src1[95:80] - dst[63:48] := src2[95:80] - dst[79:64] := src1[111:96] - dst[95:80] := src2[111:96] - dst[111:96] := src1[127:112] - dst[127:112] := src2[127:112] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[95:64] - dst[63:32] := src2[95:64] - dst[95:64] := src1[127:96] - dst[127:96] := src2[127:96] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) { - dst[7:0] := src1[7:0] - dst[15:8] := src2[7:0] - dst[23:16] := src1[15:8] - dst[31:24] := src2[15:8] - dst[39:32] := src1[23:16] - dst[47:40] := src2[23:16] - dst[55:48] := src1[31:24] - dst[63:56] := src2[31:24] - dst[71:64] := src1[39:32] - dst[79:72] := src2[39:32] - dst[87:80] := src1[47:40] - dst[95:88] := src2[47:40] - dst[103:96] := src1[55:48] - dst[111:104] := src2[55:48] - dst[119:112] := src1[63:56] - dst[127:120] := src2[63:56] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) { - dst[15:0] := src1[15:0] - dst[31:16] := src2[15:0] - dst[47:32] := src1[31:16] - dst[63:48] := src2[31:16] - dst[79:64] := src1[47:32] - dst[95:80] := src2[47:32] - dst[111:96] := src1[63:48] - dst[127:112] := src2[63:48] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) { - dst[31:0] := src1[31:0] - dst[63:32] := src2[31:0] - dst[95:64] := src1[63:32] - dst[127:96] := src2[63:32] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[127:64] - dst[127:64] := src2[127:64] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". - -DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { - dst[63:0] := src1[63:0] - dst[127:64] := src2[63:0] - RETURN dst[127:0] -} -dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - - Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst". - -dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] -dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] - - - SSE2 -
emmintrin.h
- Swizzle -
- - - - - Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := SQRT(b[63:0]) -dst[127:64] := a[127:64] - - - SSE2 -
emmintrin.h
- Elementary Math Functions -
- - - - Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SQRT(a[i+63:i]) -ENDFOR - - - SSE2 -
emmintrin.h
- Elementary Math Functions -
- - - - Cast vector of type __m128d to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - SSE2 -
emmintrin.h
- Cast -
- - - - Cast vector of type __m128d to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - SSE2 -
emmintrin.h
- Cast -
- - - - Cast vector of type __m128 to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - SSE2 -
emmintrin.h
- Cast -
- - - - Cast vector of type __m128 to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - SSE2 -
emmintrin.h
- Cast -
- - - - Cast vector of type __m128i to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - SSE2 -
emmintrin.h
- Cast -
- - - - Cast vector of type __m128i to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. - SSE2 -
emmintrin.h
- Cast -
- - - - - - - Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF ((j & 1) == 0) - dst[i+31:i] := a[i+31:i] - b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] + b[i+31:i] - FI -ENDFOR - - - SSE3 -
pmmintrin.h
- Arithmetic -
- - - - - Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF ((j & 1) == 0) - dst[i+63:i] := a[i+63:i] - b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] + b[i+63:i] - FI -ENDFOR - - - SSE3 -
pmmintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". - -dst[63:0] := a[127:64] + a[63:0] -dst[127:64] := b[127:64] + b[63:0] - - - SSE3 -
pmmintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". - -dst[31:0] := a[63:32] + a[31:0] -dst[63:32] := a[127:96] + a[95:64] -dst[95:64] := b[63:32] + b[31:0] -dst[127:96] := b[127:96] + b[95:64] - - - SSE3 -
pmmintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". - -dst[63:0] := a[63:0] - a[127:64] -dst[127:64] := b[63:0] - b[127:64] - - - SSE3 -
pmmintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". - -dst[31:0] := a[31:0] - a[63:32] -dst[63:32] := a[95:64] - a[127:96] -dst[95:64] := b[31:0] - b[63:32] -dst[127:96] := b[95:64] - b[127:96] - - - SSE3 -
pmmintrin.h
- Arithmetic -
- - - - Load 128-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm_loadu_si128" when the data crosses a cache line boundary. - -dst[127:0] := MEM[mem_addr+127:mem_addr] - - - SSE3 -
pmmintrin.h
- Load -
- - - - Load a double-precision (64-bit) floating-point element from memory into both elements of "dst". - -dst[63:0] := MEM[mem_addr+63:mem_addr] -dst[127:64] := MEM[mem_addr+63:mem_addr] - - - SSE3 -
pmmintrin.h
- Load -
- - - - Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst". - -dst[63:0] := a[63:0] -dst[127:64] := a[63:0] - - - SSE3 -
pmmintrin.h
- Move -
- - - - Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". - -dst[31:0] := a[63:32] -dst[63:32] := a[63:32] -dst[95:64] := a[127:96] -dst[127:96] := a[127:96] - - - SSE3 -
pmmintrin.h
- Move -
- - - - Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". - -dst[31:0] := a[31:0] -dst[63:32] := a[31:0] -dst[95:64] := a[95:64] -dst[127:96] := a[95:64] - - - SSE3 -
pmmintrin.h
- Move -
- - - - - - - - Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF imm8[j] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF imm8[j] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - IF mask[i+63] - dst[i+63:i] := b[i+63:i] - ELSE - dst[i+63:i] := a[i+63:i] - FI -ENDFOR - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst". - -FOR j := 0 to 3 - i := j*32 - IF mask[i+31] - dst[i+31:i] := b[i+31:i] - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - IF mask[i+7] - dst[i+7:i] := b[i+7:i] - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". - -FOR j := 0 to 7 - i := j*16 - IF imm8[j] - dst[i+15:i] := b[i+15:i] - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst". - -dst[31:0] := (a[127:0] >> (imm8[1:0] * 32))[31:0] - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - Extract an 8-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". - -dst[7:0] := (a[127:0] >> (imm8[3:0] * 8))[7:0] -dst[31:8] := 0 - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - Extract a 32-bit integer from "a", selected with "imm8", and store the result in "dst". - -dst[31:0] := (a[127:0] >> (imm8[1:0] * 32))[31:0] - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - Extract a 64-bit integer from "a", selected with "imm8", and store the result in "dst". - -dst[63:0] := (a[127:0] >> (imm8[0] * 64))[63:0] - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Copy "a" to "tmp", then insert a single-precision (32-bit) floating-point element from "b" into "tmp" using the control in "imm8". Store "tmp" to "dst" using the mask in "imm8" (elements are zeroed out when the corresponding bit is set). - -tmp2[127:0] := a[127:0] -CASE (imm8[7:6]) OF -0: tmp1[31:0] := b[31:0] -1: tmp1[31:0] := b[63:32] -2: tmp1[31:0] := b[95:64] -3: tmp1[31:0] := b[127:96] -ESAC -CASE (imm8[5:4]) OF -0: tmp2[31:0] := tmp1[31:0] -1: tmp2[63:32] := tmp1[31:0] -2: tmp2[95:64] := tmp1[31:0] -3: tmp2[127:96] := tmp1[31:0] -ESAC -FOR j := 0 to 3 - i := j*32 - IF imm8[j%8] - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := tmp2[i+31:i] - FI -ENDFOR - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the lower 8-bit integer from "i" into "dst" at the location specified by "imm8". - -dst[127:0] := a[127:0] -sel := imm8[3:0]*8 -dst[sel+7:sel] := i[7:0] - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "imm8". - -dst[127:0] := a[127:0] -sel := imm8[1:0]*32 -dst[sel+31:sel] := i[31:0] - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "imm8". - -dst[127:0] := a[127:0] -sel := imm8[0]*64 -dst[sel+63:sel] := i[63:0] - - - SSE4.1 -
smmintrin.h
- Swizzle -
- - - - - - Conditionally multiply the packed double-precision (64-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". - -DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { - FOR j := 0 to 1 - i := j*64 - IF imm8[(4+j)%8] - temp[i+63:i] := a[i+63:i] * b[i+63:i] - ELSE - temp[i+63:i] := 0.0 - FI - ENDFOR - - sum[63:0] := temp[127:64] + temp[63:0] - - FOR j := 0 to 1 - i := j*64 - IF imm8[j%8] - tmpdst[i+63:i] := sum[63:0] - ELSE - tmpdst[i+63:i] := 0.0 - FI - ENDFOR - RETURN tmpdst[127:0] -} -dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) - - - SSE4.1 -
smmintrin.h
- Arithmetic -
- - - - - - Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". - -DEFINE DP(a[127:0], b[127:0], imm8[7:0]) { - FOR j := 0 to 3 - i := j*32 - IF imm8[(4+j)%8] - temp[i+31:i] := a[i+31:i] * b[i+31:i] - ELSE - temp[i+31:i] := 0 - FI - ENDFOR - - sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0]) - - FOR j := 0 to 3 - i := j*32 - IF imm8[j%8] - tmpdst[i+31:i] := sum[31:0] - ELSE - tmpdst[i+31:i] := 0 - FI - ENDFOR - RETURN tmpdst[127:0] -} -dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0]) - - - SSE4.1 -
smmintrin.h
- Arithmetic -
- - - - - Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Arithmetic -
- - - - - Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". - -FOR j := 0 to 3 - i := j*32 - tmp[63:0] := a[i+31:i] * b[i+31:i] - dst[i+31:i] := tmp[31:0] -ENDFOR - - - SSE4.1 -
smmintrin.h
- Arithmetic -
- - Miscellaneous - - - - - Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst". - Eight SADs are performed using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8". - -DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) { - a_offset := imm8[2]*32 - b_offset := imm8[1:0]*32 - FOR j := 0 to 7 - i := j*8 - k := a_offset+i - l := b_offset - tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \ - ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24])) - ENDFOR - RETURN tmp[127:0] -} -dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0]) - - - SSE4.1 -
smmintrin.h
- Arithmetic -
- - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := MAX(a[i+31:i], b[i+31:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := MIN(a[i+31:i], b[i+31:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst". - [round_note] - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ROUND(a[i+63:i], rounding) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := FLOOR(a[i+63:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := CEIL(a[i+63:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". - [round_note] - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ROUND(a[i+31:i], rounding) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := FLOOR(a[i+31:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := CEIL(a[i+31:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - - Round the lower double-precision (64-bit) floating-point element in "b" using the "rounding" parameter, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - [round_note] - -dst[63:0] := ROUND(b[63:0], rounding) -dst[127:64] := a[127:64] - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Round the lower double-precision (64-bit) floating-point element in "b" down to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := FLOOR(b[63:0]) -dst[127:64] := a[127:64] - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Round the lower double-precision (64-bit) floating-point element in "b" up to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". - -dst[63:0] := CEIL(b[63:0]) -dst[127:64] := a[127:64] - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - - Round the lower single-precision (32-bit) floating-point element in "b" using the "rounding" parameter, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - [round_note] - -dst[31:0] := ROUND(b[31:0], rounding) -dst[127:32] := a[127:32] - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Round the lower single-precision (32-bit) floating-point element in "b" down to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := FLOOR(b[31:0]) -dst[127:32] := a[127:32] - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - - - - Round the lower single-precision (32-bit) floating-point element in "b" up to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". - -dst[31:0] := CEIL(b[31:0]) -dst[127:32] := a[127:32] - - - SSE4.1 -
smmintrin.h
- Special Math Functions -
- - Miscellaneous - - - - Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". - -dst[15:0] := SaturateU16(a[31:0]) -dst[31:16] := SaturateU16(a[63:32]) -dst[47:32] := SaturateU16(a[95:64]) -dst[63:48] := SaturateU16(a[127:96]) -dst[79:64] := SaturateU16(b[31:0]) -dst[95:80] := SaturateU16(b[63:32]) -dst[111:96] := SaturateU16(b[95:64]) -dst[127:112] := SaturateU16(b[127:96]) - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - l := j*16 - dst[l+15:l] := SignExtend16(a[i+7:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 8*j - dst[i+31:i] := SignExtend32(a[k+7:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 8*j - dst[i+63:i] := SignExtend64(a[k+7:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 16*j - dst[i+31:i] := SignExtend32(a[k+15:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 16*j - dst[i+63:i] := SignExtend64(a[k+15:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 32*j - dst[i+63:i] := SignExtend64(a[k+31:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - l := j*16 - dst[l+15:l] := ZeroExtend16(a[i+7:i]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 8*j - dst[i+31:i] := ZeroExtend32(a[k+7:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 8*j - dst[i+63:i] := ZeroExtend64(a[k+7:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". - -FOR j := 0 to 3 - i := 32*j - k := 16*j - dst[i+31:i] := ZeroExtend32(a[k+15:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 16*j - dst[i+63:i] := ZeroExtend64(a[k+15:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". - -FOR j := 0 to 1 - i := 64*j - k := 32*j - dst[i+63:i] := ZeroExtend64(a[k+31:k]) -ENDFOR - - - SSE4.1 -
smmintrin.h
- Convert -
- - - - - Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE4.1 -
smmintrin.h
- Compare -
- - - - - Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value. - -IF ((a[127:0] AND b[127:0]) == 0) - ZF := 1 -ELSE - ZF := 0 -FI -IF (((NOT a[127:0]) AND b[127:0]) == 0) - CF := 1 -ELSE - CF := 0 -FI -RETURN ZF - - - SSE4.1 -
smmintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value. - -IF ((a[127:0] AND b[127:0]) == 0) - ZF := 1 -ELSE - ZF := 0 -FI -IF (((NOT a[127:0]) AND b[127:0]) == 0) - CF := 1 -ELSE - CF := 0 -FI -RETURN CF - - - SSE4.1 -
smmintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. - -IF ((a[127:0] AND b[127:0]) == 0) - ZF := 1 -ELSE - ZF := 0 -FI -IF (((NOT a[127:0]) AND b[127:0]) == 0) - CF := 1 -ELSE - CF := 0 -FI -IF (ZF == 0 && CF == 0) - dst := 1 -ELSE - dst := 0 -FI - - - SSE4.1 -
smmintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and return 1 if the result is zero, otherwise return 0. - -IF ((a[127:0] AND mask[127:0]) == 0) - ZF := 1 -ELSE - ZF := 0 -FI -dst := ZF - - - SSE4.1 -
smmintrin.h
- Logical -
- - - - - Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "mask", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0. - -IF ((a[127:0] AND mask[127:0]) == 0) - ZF := 1 -ELSE - ZF := 0 -FI -IF (((NOT a[127:0]) AND mask[127:0]) == 0) - CF := 1 -ELSE - CF := 0 -FI -IF (ZF == 0 && CF == 0) - dst := 1 -ELSE - dst := 0 -FI - - - SSE4.1 -
smmintrin.h
- Logical -
- - - - Compute the bitwise NOT of "a" and then AND with a 128-bit vector containing all 1's, and return 1 if the result is zero, otherwise return 0. - -FOR j := 0 to 127 - tmp[j] := 1 -ENDFOR -IF (((NOT a[127:0]) AND tmp[127:0]) == 0) - CF := 1 -ELSE - CF := 0 -FI -dst := CF - - - - SSE4.1 -
smmintrin.h
- Logical -
- - - - Horizontally compute the minimum amongst the packed unsigned 16-bit integers in "a", store the minimum and index in "dst", and zero the remaining bits in "dst". - -index[2:0] := 0 -min[15:0] := a[15:0] -FOR j := 0 to 7 - i := j*16 - IF a[i+15:i] < min[15:0] - index[2:0] := j - min[15:0] := a[i+15:i] - FI -ENDFOR -dst[15:0] := min[15:0] -dst[18:16] := index[2:0] -dst[127:19] := 0 - - - SSE4.1 -
smmintrin.h
- Miscellaneous -
- - - - Load 128-bits of integer data from memory into "dst" using a non-temporal memory hint. - "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated. - -dst[127:0] := MEM[mem_addr+127:mem_addr] - - - SSE4.1 -
smmintrin.h
- Load -
- - - - - - - - Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated mask in "dst". - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF a[m+size-1:m] == 0 - aInvalid := 1 - FI - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -bInvalid := 0 -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - IF bInvalid // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -IF imm8[6] // byte / word mask - FOR i := 0 to UpperBound - j := i*size - IF IntRes2[i] - dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF) - ELSE - dst[j+size-1:j] := 0 - FI - ENDFOR -ELSE // bit mask - dst[UpperBound:0] := IntRes2[UpperBound:0] - dst[127:UpperBound+1] := 0 -FI - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated index in "dst". - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF a[m+size-1:m] == 0 - aInvalid := 1 - FI - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -bInvalid := 0 -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - IF bInvalid // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -IF imm8[6] // most significant bit - tmp := UpperBound - dst := tmp - DO WHILE ((tmp >= 0) AND a[tmp] == 0) - tmp := tmp - 1 - dst := tmp - OD -ELSE // least significant bit - tmp := 0 - dst := tmp - DO WHILE ((tmp <= UpperBound) AND a[tmp] == 0) - tmp := tmp + 1 - dst := tmp - OD -FI - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -bInvalid := 0 -FOR j := 0 to UpperBound - n := j*size - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI -ENDFOR -dst := bInvalid - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF a[m+size-1:m] == 0 - aInvalid := 1 - FI - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -bInvalid := 0 -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - IF bInvalid // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -dst := (IntRes2 != 0) - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -aInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - IF a[m+size-1:m] == 0 - aInvalid := 1 - FI -ENDFOR -dst := aInvalid - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns bit 0 of the resulting bit mask. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF a[m+size-1:m] == 0 - aInvalid := 1 - FI - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -bInvalid := 0 -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - IF bInvalid // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -dst := IntRes2[0] - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF a[m+size-1:m] == 0 - aInvalid := 1 - FI - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -bInvalid := 0 -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF b[n+size-1:n] == 0 - bInvalid := 1 - FI - IF bInvalid // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -dst := (IntRes2 == 0) AND bInvalid - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - - - Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated mask in "dst". - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF i == la - aInvalid := 1 - FI - IF j == lb - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF i >= lb // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -IF imm8[6] // byte / word mask - FOR i := 0 to UpperBound - j := i*size - IF IntRes2[i] - dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF) - ELSE - dst[j+size-1:j] := 0 - FI - ENDFOR -ELSE // bit mask - dst[UpperBound:0] := IntRes2[UpperBound:0] - dst[127:UpperBound+1] := 0 -FI - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - - - Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated index in "dst". - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF i == la - aInvalid := 1 - FI - IF j == lb - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF i >= lb // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -IF imm8[6] // most significant bit - tmp := UpperBound - dst := tmp - DO WHILE ((tmp >= 0) AND a[tmp] == 0) - tmp := tmp - 1 - dst := tmp - OD -ELSE // least significant bit - tmp := 0 - dst := tmp - DO WHILE ((tmp <= UpperBound) AND a[tmp] == 0) - tmp := tmp + 1 - dst := tmp - OD -FI - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - - - Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -dst := (lb <= UpperBound) - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - - - Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF i == la - aInvalid := 1 - FI - IF j == lb - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF i >= lb // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -dst := (IntRes2 != 0) - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - - - Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -dst := (la <= UpperBound) - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - - - Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns bit 0 of the resulting bit mask. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF i == la - aInvalid := 1 - FI - IF j == lb - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF i >= lb // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -dst := IntRes2[0] - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - - - - Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise. - [strcmp_note] - -size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters -UpperBound := (128 / size) - 1 -BoolRes := 0 -// compare all characters -aInvalid := 0 -bInvalid := 0 -FOR i := 0 to UpperBound - m := i*size - FOR j := 0 to UpperBound - n := j*size - BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0 - - // invalidate characters after EOS - IF i == la - aInvalid := 1 - FI - IF j == lb - bInvalid := 1 - FI - - // override comparisons for invalid characters - CASE (imm8[3:2]) OF - 0: // equal any - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 1: // ranges - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - FI - 2: // equal each - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - 3: // equal ordered - IF (!aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 0 - ELSE IF (aInvalid && !bInvalid) - BoolRes.word[i].bit[j] := 1 - ELSE IF (aInvalid && bInvalid) - BoolRes.word[i].bit[j] := 1 - FI - ESAC - ENDFOR -ENDFOR -// aggregate results -CASE (imm8[3:2]) OF -0: // equal any - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j] - ENDFOR - ENDFOR -1: // ranges - IntRes1 := 0 - FOR i := 0 to UpperBound - FOR j := 0 to UpperBound - IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1]) - j += 2 - ENDFOR - ENDFOR -2: // equal each - IntRes1 := 0 - FOR i := 0 to UpperBound - IntRes1[i] := BoolRes.word[i].bit[i] - ENDFOR -3: // equal ordered - IntRes1 := (imm8[0] ? 0xFF : 0xFFFF) - FOR i := 0 to UpperBound - k := i - FOR j := 0 to UpperBound-i - IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j] - k := k+1 - ENDFOR - ENDFOR -ESAC -// optionally negate results -FOR i := 0 to UpperBound - IF imm8[4] - IF imm8[5] // only negate valid - IF i >= lb // invalid, don't negate - IntRes2[i] := IntRes1[i] - ELSE // valid, negate - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // negate all - IntRes2[i] := -1 XOR IntRes1[i] - FI - ELSE // don't negate - IntRes2[i] := IntRes1[i] - FI -ENDFOR -// output -dst := (IntRes2 == 0) AND (lb > UpperBound) - - - SSE4.2 -
nmmintrin.h
- String Compare -
- - - - - Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst". - -FOR j := 0 to 1 - i := j*64 - dst[i+63:i] := ( a[i+63:i] > b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0 -ENDFOR - - - SSE4.2 -
nmmintrin.h
- Compare -
- - - - - Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst". - tmp1[7:0] := v[0:7] // bit reflection -tmp2[31:0] := crc[0:31] // bit reflection -tmp3[39:0] := tmp1[7:0] << 32 -tmp4[39:0] := tmp2[31:0] << 8 -tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0] -tmp6[31:0] := MOD2(tmp5[39:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 -dst[31:0] := tmp6[0:31] // bit reflection - - - SSE4.2 -
nmmintrin.h
- Cryptography -
- - - - - Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst". - tmp1[15:0] := v[0:15] // bit reflection -tmp2[31:0] := crc[0:31] // bit reflection -tmp3[47:0] := tmp1[15:0] << 32 -tmp4[47:0] := tmp2[31:0] << 16 -tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0] -tmp6[31:0] := MOD2(tmp5[47:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 -dst[31:0] := tmp6[0:31] // bit reflection - - - SSE4.2 -
nmmintrin.h
- Cryptography -
- - - - - Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst". - tmp1[31:0] := v[0:31] // bit reflection -tmp2[31:0] := crc[0:31] // bit reflection -tmp3[63:0] := tmp1[31:0] << 32 -tmp4[63:0] := tmp2[31:0] << 32 -tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0] -tmp6[31:0] := MOD2(tmp5[63:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 -dst[31:0] := tmp6[0:31] // bit reflection - - - SSE4.2 -
nmmintrin.h
- Cryptography -
- - - - - Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst". - tmp1[63:0] := v[0:63] // bit reflection -tmp2[31:0] := crc[0:31] // bit reflection -tmp3[95:0] := tmp1[31:0] << 32 -tmp4[95:0] := tmp2[63:0] << 64 -tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0] -tmp6[31:0] := MOD2(tmp5[95:0], 0x11EDC6F41) // remainder from polynomial division modulus 2 -dst[31:0] := tmp6[0:31] // bit reflection - - - SSE4.2 -
nmmintrin.h
- Cryptography -
- - - - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 7 - i := j*8 - dst[i+7:i] := ABS(Int(a[i+7:i])) -ENDFOR - - - SSSE3 -
tmmintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 15 - i := j*8 - dst[i+7:i] := ABS(a[i+7:i]) -ENDFOR - - - SSSE3 -
tmmintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := ABS(Int(a[i+15:i])) -ENDFOR - - - SSSE3 -
tmmintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := ABS(a[i+15:i]) -ENDFOR - - - SSSE3 -
tmmintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 1 - i := j*32 - dst[i+31:i] := ABS(a[i+31:i]) -ENDFOR - - - SSSE3 -
tmmintrin.h
- Special Math Functions -
- - - - Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst". - -FOR j := 0 to 3 - i := j*32 - dst[i+31:i] := ABS(a[i+31:i]) -ENDFOR - - - SSSE3 -
tmmintrin.h
- Special Math Functions -
- - - - - Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". - -FOR j := 0 to 15 - i := j*8 - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[3:0] := b[i+3:i] - dst[i+7:i] := a[index*8+7:index*8] - FI -ENDFOR - - - SSSE3 -
tmmintrin.h
- Swizzle -
- - - - - Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". - -FOR j := 0 to 7 - i := j*8 - IF b[i+7] == 1 - dst[i+7:i] := 0 - ELSE - index[2:0] := b[i+2:i] - dst[i+7:i] := a[index*8+7:index*8] - FI -ENDFOR - - - SSSE3 -
tmmintrin.h
- Swizzle -
- - - - - - Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". - -tmp[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) -dst[127:0] := tmp[127:0] - - - SSSE3 -
tmmintrin.h
- Miscellaneous -
- - - - - - Concatenate 8-byte blocks in "a" and "b" into a 16-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst". - -tmp[127:0] := ((a[63:0] << 64)[127:0] OR b[63:0]) >> (imm8*8) -dst[63:0] := tmp[63:0] - - - SSSE3 -
tmmintrin.h
- Miscellaneous -
- - - - - Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". - -dst[15:0] := a[31:16] + a[15:0] -dst[31:16] := a[63:48] + a[47:32] -dst[47:32] := a[95:80] + a[79:64] -dst[63:48] := a[127:112] + a[111:96] -dst[79:64] := b[31:16] + b[15:0] -dst[95:80] := b[63:48] + b[47:32] -dst[111:96] := b[95:80] + b[79:64] -dst[127:112] := b[127:112] + b[111:96] - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". - -dst[15:0] := Saturate16(a[31:16] + a[15:0]) -dst[31:16] := Saturate16(a[63:48] + a[47:32]) -dst[47:32] := Saturate16(a[95:80] + a[79:64]) -dst[63:48] := Saturate16(a[127:112] + a[111:96]) -dst[79:64] := Saturate16(b[31:16] + b[15:0]) -dst[95:80] := Saturate16(b[63:48] + b[47:32]) -dst[111:96] := Saturate16(b[95:80] + b[79:64]) -dst[127:112] := Saturate16(b[127:112] + b[111:96]) - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". - -dst[31:0] := a[63:32] + a[31:0] -dst[63:32] := a[127:96] + a[95:64] -dst[95:64] := b[63:32] + b[31:0] -dst[127:96] := b[127:96] + b[95:64] - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". - -dst[15:0] := a[31:16] + a[15:0] -dst[31:16] := a[63:48] + a[47:32] -dst[47:32] := b[31:16] + b[15:0] -dst[63:48] := b[63:48] + b[47:32] - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". - -dst[31:0] := a[63:32] + a[31:0] -dst[63:32] := b[63:32] + b[31:0] - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". - -dst[15:0] := Saturate16(a[31:16] + a[15:0]) -dst[31:16] := Saturate16(a[63:48] + a[47:32]) -dst[47:32] := Saturate16(b[31:16] + b[15:0]) -dst[63:48] := Saturate16(b[63:48] + b[47:32]) - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". - -dst[15:0] := a[15:0] - a[31:16] -dst[31:16] := a[47:32] - a[63:48] -dst[47:32] := a[79:64] - a[95:80] -dst[63:48] := a[111:96] - a[127:112] -dst[79:64] := b[15:0] - b[31:16] -dst[95:80] := b[47:32] - b[63:48] -dst[111:96] := b[79:64] - b[95:80] -dst[127:112] := b[111:96] - b[127:112] - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". - -dst[15:0] := Saturate16(a[15:0] - a[31:16]) -dst[31:16] := Saturate16(a[47:32] - a[63:48]) -dst[47:32] := Saturate16(a[79:64] - a[95:80]) -dst[63:48] := Saturate16(a[111:96] - a[127:112]) -dst[79:64] := Saturate16(b[15:0] - b[31:16]) -dst[95:80] := Saturate16(b[47:32] - b[63:48]) -dst[111:96] := Saturate16(b[79:64] - b[95:80]) -dst[127:112] := Saturate16(b[111:96] - b[127:112]) - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". - -dst[31:0] := a[31:0] - a[63:32] -dst[63:32] := a[95:64] - a[127:96] -dst[95:64] := b[31:0] - b[63:32] -dst[127:96] := b[95:64] - b[127:96] - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". - -dst[15:0] := a[15:0] - a[31:16] -dst[31:16] := a[47:32] - a[63:48] -dst[47:32] := b[15:0] - b[31:16] -dst[63:48] := b[47:32] - b[63:48] - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". - -dst[31:0] := a[31:0] - a[63:32] -dst[63:32] := b[31:0] - b[63:32] - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". - -dst[15:0] := Saturate16(a[15:0] - a[31:16]) -dst[31:16] := Saturate16(a[47:32] - a[63:48]) -dst[47:32] := Saturate16(b[15:0] - b[31:16]) -dst[63:48] := Saturate16(b[47:32] - b[63:48]) - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". - -FOR j := 0 to 7 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". - -FOR j := 0 to 3 - i := j*16 - dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] ) -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". - -FOR j := 0 to 7 - i := j*16 - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". - -FOR j := 0 to 3 - i := j*16 - tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) >> 14) + 1 - dst[i+15:i] := tmp[16:1] -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 15 - i := j*8 - IF b[i+7:i] < 0 - dst[i+7:i] := -(a[i+7:i]) - ELSE IF b[i+7:i] == 0 - dst[i+7:i] := 0 - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 7 - i := j*16 - IF b[i+15:i] < 0 - dst[i+15:i] := -(a[i+15:i]) - ELSE IF b[i+15:i] == 0 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 3 - i := j*32 - IF b[i+31:i] < 0 - dst[i+31:i] := -(a[i+31:i]) - ELSE IF b[i+31:i] == 0 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 7 - i := j*8 - IF b[i+7:i] < 0 - dst[i+7:i] := -(a[i+7:i]) - ELSE IF b[i+7:i] == 0 - dst[i+7:i] := 0 - ELSE - dst[i+7:i] := a[i+7:i] - FI -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 3 - i := j*16 - IF b[i+15:i] < 0 - dst[i+15:i] := -(a[i+15:i]) - ELSE IF b[i+15:i] == 0 - dst[i+15:i] := 0 - ELSE - dst[i+15:i] := a[i+15:i] - FI -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. - -FOR j := 0 to 1 - i := j*32 - IF b[i+31:i] < 0 - dst[i+31:i] := -(a[i+31:i]) - ELSE IF b[i+31:i] == 0 - dst[i+31:i] := 0 - ELSE - dst[i+31:i] := a[i+31:i] - FI -ENDFOR - - - SSSE3 -
tmmintrin.h
- Arithmetic -
- - - - - - Copy the current 64-bit value of the processor's time-stamp counter into "dst". - dst[63:0] := TimeStampCounter - - - TSC -
immintrin.h
- General Support -
- - - - - Mark the start of a TSX (HLE/RTM) suspend load address tracking region. If this is used inside a transactional region, subsequent loads are not added to the read set of the transaction. If this is used inside a suspend load address tracking region it will cause transaction abort. If this is used outside of a transactional region it behaves like a NOP. - - TSXLDTRK -
immintrin.h
- Miscellaneous -
- - - Mark the end of a TSX (HLE/RTM) suspend load address tracking region. If this is used inside a suspend load address tracking region it will end the suspend region and all following load addresses will be added to the transaction read set. If this is used inside an active transaction but not in a suspend region it will cause transaction abort. If this is used outside of a transactional region it behaves like a NOP. - - TSXLDTRK -
immintrin.h
- Miscellaneous -
- - - - - - Clear the user interrupt flag (UIF). - - UINTR -
immintrin.h
- General Support -
- - - - Send user interprocessor interrupts specified in unsigned 64-bit integer "__a". - - UINTR -
immintrin.h
- General Support -
- - - - Sets the user interrupt flag (UIF). - - UINTR -
immintrin.h
- General Support -
- - - - Store the current user interrupt flag (UIF) in unsigned 8-bit integer "dst". - - UINTR -
immintrin.h
- General Support -
- - - - - Reads the contents of a 64-bit MSR specified in "__A" into "dst". - DEST := MSR[__A] - - - USER_MSR -
x86gprintrin.h
- General Support -
- - - - - Writes the contents of "__B" into the 64-bit MSR specified in "__A". - MSR[__A] := __B - - - USER_MSR -
x86gprintrin.h
- General Support -
- - - - - Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." - FOR j := 0 to 1 - i := j*128 - a[i+127:i] := ShiftRows(a[i+127:i]) - a[i+127:i] := SubBytes(a[i+127:i]) - dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] -ENDFOR -dst[MAX:256] := 0 - - - VAES - AVX512VL -
immintrin.h
- Cryptography -
- - - - - Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"." - FOR j := 0 to 1 - i := j*128 - a[i+127:i] := ShiftRows(a[i+127:i]) - a[i+127:i] := SubBytes(a[i+127:i]) - a[i+127:i] := MixColumns(a[i+127:i]) - dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] -ENDFOR -dst[MAX:256] := 0 - - - VAES - AVX512VL -
immintrin.h
- Cryptography -
- - - - - Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". - FOR j := 0 to 1 - i := j*128 - a[i+127:i] := InvShiftRows(a[i+127:i]) - a[i+127:i] := InvSubBytes(a[i+127:i]) - dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] -ENDFOR -dst[MAX:256] := 0 - - - VAES - AVX512VL -
immintrin.h
- Cryptography -
- - - - - Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst". - FOR j := 0 to 1 - i := j*128 - a[i+127:i] := InvShiftRows(a[i+127:i]) - a[i+127:i] := InvSubBytes(a[i+127:i]) - a[i+127:i] := InvMixColumns(a[i+127:i]) - dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i] -ENDFOR -dst[MAX:256] := 0 - - - VAES - AVX512VL -
immintrin.h
- Cryptography -
- - - - - - - - Carry-less multiplication of one quadword of - 'b' by one quadword of 'c', stores - the 128-bit result in 'dst'. The immediate 'Imm8' is - used to determine which quadwords of 'b' - and 'c' should be used. - -DEFINE PCLMUL128(X,Y) { - FOR i := 0 to 63 - TMP[i] := X[ 0 ] and Y[ i ] - FOR j := 1 to i - TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) - ENDFOR - DEST[ i ] := TMP[ i ] - ENDFOR - FOR i := 64 to 126 - TMP[i] := 0 - FOR j := i - 63 to 63 - TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) - ENDFOR - DEST[ i ] := TMP[ i ] - ENDFOR - DEST[127] := 0 - RETURN DEST // 128b vector -} -FOR i := 0 to 1 - IF Imm8[0] == 0 - TEMP1 := b.m128[i].qword[0] - ELSE - TEMP1 := b.m128[i].qword[1] - FI - IF Imm8[4] == 0 - TEMP2 := c.m128[i].qword[0] - ELSE - TEMP2 := c.m128[i].qword[1] - FI - dst.m128[i] := PCLMUL128(TEMP1, TEMP2) -ENDFOR -dst[MAX:256] := 0 - - - VPCLMULQDQ - AVX512VL -
immintrin.h
- Application-Targeted -
- - - - - - - - Carry-less multiplication of one quadword of - 'b' by one quadword of 'c', stores - the 128-bit result in 'dst'. The immediate 'Imm8' is - used to determine which quadwords of 'b' - and 'c' should be used. - -DEFINE PCLMUL128(X,Y) { - FOR i := 0 to 63 - TMP[i] := X[ 0 ] and Y[ i ] - FOR j := 1 to i - TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) - ENDFOR - DEST[ i ] := TMP[ i ] - ENDFOR - FOR i := 64 to 126 - TMP[i] := 0 - FOR j := i - 63 to 63 - TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ]) - ENDFOR - DEST[ i ] := TMP[ i ] - ENDFOR - DEST[127] := 0 - RETURN DEST // 128b vector -} -FOR i := 0 to 3 - IF Imm8[0] == 0 - TEMP1 := b.m128[i].qword[0] - ELSE - TEMP1 := b.m128[i].qword[1] - FI - IF Imm8[4] == 0 - TEMP2 := c.m128[i].qword[0] - ELSE - TEMP2 := c.m128[i].qword[1] - FI - dst.m128[i] := PCLMUL128(TEMP1, TEMP2) -ENDFOR -dst[MAX:512] := 0 - - - VPCLMULQDQ -
immintrin.h
- Application-Targeted -
- - - - - - - Directs the processor to enter an implementation-dependent optimized state until the TSC reaches or exceeds the value specified in "counter". Bit 0 of "ctrl" selects between a lower power (cleared) or faster wakeup (set) optimized state. Returns the carry flag (CF). If the processor that executed a UMWAIT instruction wakes due to the expiration of the operating system timelimit, the instructions sets RFLAGS.CF; otherwise, that flag is cleared. - - WAITPKG -
immintrin.h
- Miscellaneous -
- - - - - Directs the processor to enter an implementation-dependent optimized state while monitoring a range of addresses. The instruction wakes up when the TSC reaches or exceeds the value specified in "counter" (if the monitoring hardware did not trigger beforehand). Bit 0 of "ctrl" selects between a lower power (cleared) or faster wakeup (set) optimized state. Returns the carry flag (CF). If the processor that executed a UMWAIT instruction wakes due to the expiration of the operating system timelimit, the instructions sets RFLAGS.CF; otherwise, that flag is cleared. - - WAITPKG -
immintrin.h
- Miscellaneous -
- - - - Sets up a linear address range to be - monitored by hardware and activates the - monitor. The address range should be a writeback - memory caching type. The address is - contained in "a". - - WAITPKG -
immintrin.h
- Miscellaneous -
- - - - - - Write back and do not flush internal caches. - Initiate writing-back without flushing of external - caches. - - WBNOINVD -
immintrin.h
- Miscellaneous -
- - - - - - - Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. - mask[62:0] := save_mask[62:0] AND XCR0[62:0] -FOR i := 0 to 62 - IF mask[i] - CASE (i) OF - 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] - 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] - DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] - ESAC - mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] - FI - i := i + 1 -ENDFOR - - - XSAVE - XSAVEC -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. - mask[62:0] := save_mask[62:0] AND XCR0[62:0] -FOR i := 0 to 62 - IF mask[i] - CASE (i) OF - 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] - 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] - DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] - ESAC - mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] - FI - i := i + 1 -ENDFOR - - - XSAVE - XSAVEC -
immintrin.h
- OS-Targeted -
- - - - - - - Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE instruction. - mask[62:0] := save_mask[62:0] AND XCR0[62:0] -FOR i := 0 to 62 - IF mask[i] - CASE (i) OF - 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] - 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] - 2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM] - DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] - ESAC - mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] - FI - i := i + 1 -ENDFOR - - - XSAVE - XSAVEOPT -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE64 instruction. - mask[62:0] := save_mask[62:0] AND XCR0[62:0] -FOR i := 0 to 62 - IF mask[i] - CASE (i) OF - 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] - 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] - 2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM] - DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] - ESAC - mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] - FI - i := i + 1 -ENDFOR - - - XSAVE - XSAVEOPT -
immintrin.h
- OS-Targeted -
- - - - - - - Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. - mask[62:0] := save_mask[62:0] AND XCR0[62:0] -FOR i := 0 to 62 - IF mask[i] - CASE (i) OF - 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] - 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] - DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] - ESAC - mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] - FI - i := i + 1 -ENDFOR - - - XSAVE - XSS -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. - mask[62:0] := save_mask[62:0] AND XCR0[62:0] -FOR i := 0 to 62 - IF mask[i] - CASE (i) OF - 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] - 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] - DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] - ESAC - mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] - FI - i := i + 1 -ENDFOR - - - XSAVE - XSS -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. - st_mask := mem_addr.HEADER.XSTATE_BV[62:0] -FOR i := 0 to 62 - IF (rs_mask[i] AND XCR0[i]) - IF st_mask[i] - CASE (i) OF - 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] - 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] - DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] - ESAC - ELSE - // ProcessorExtendedState := Processor Supplied Values - CASE (i) OF - 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] - ESAC - FI - FI - i := i + 1 -ENDFOR - - - XSAVE - XSS -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. - st_mask := mem_addr.HEADER.XSTATE_BV[62:0] -FOR i := 0 to 62 - IF (rs_mask[i] AND XCR0[i]) - IF st_mask[i] - CASE (i) OF - 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] - 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] - DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] - ESAC - ELSE - // ProcessorExtendedState := Processor Supplied Values - CASE (i) OF - 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] - ESAC - FI - FI - i := i + 1 -ENDFOR - - - XSAVE - XSS -
immintrin.h
- OS-Targeted -
- - - - - - Copy up to 64-bits from the value of the extended control register (XCR) specified by "a" into "dst". Currently only XFEATURE_ENABLED_MASK XCR is supported. - dst[63:0] := XCR[a] - - - XSAVE -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. - st_mask := mem_addr.HEADER.XSTATE_BV[62:0] -FOR i := 0 to 62 - IF (rs_mask[i] AND XCR0[i]) - IF st_mask[i] - CASE (i) OF - 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] - 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] - DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] - ESAC - ELSE - // ProcessorExtendedState := Processor Supplied Values - CASE (i) OF - 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] - ESAC - FI - FI - i := i + 1 -ENDFOR - - - XSAVE -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary. - st_mask := mem_addr.HEADER.XSTATE_BV[62:0] -FOR i := 0 to 62 - IF (rs_mask[i] AND XCR0[i]) - IF st_mask[i] - CASE (i) OF - 0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU] - 1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE] - DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i] - ESAC - ELSE - // ProcessorExtendedState := Processor Supplied Values - CASE (i) OF - 1: MXCSR := mem_addr.FPUSSESave_Area[SSE] - ESAC - FI - FI - i := i + 1 -ENDFOR - - - XSAVE -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. - mask[62:0] := save_mask[62:0] AND XCR0[62:0] -FOR i := 0 to 62 - IF mask[i] - CASE (i) OF - 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] - 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] - DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] - ESAC - mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] - FI - i := i + 1 -ENDFOR - - - XSAVE -
immintrin.h
- OS-Targeted -
- - - - - Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. - mask[62:0] := save_mask[62:0] AND XCR0[62:0] -FOR i := 0 to 62 - IF mask[i] - CASE (i) OF - 0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU] - 1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE] - DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i] - ESAC - mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i] - FI - i := i + 1 -ENDFOR - - - XSAVE -
immintrin.h
- OS-Targeted -
- - - - - Copy 64-bits from "val" to the extended control register (XCR) specified by "a". Currently only XFEATURE_ENABLED_MASK XCR is supported. - -XCR[a] := val[63:0] - - - XSAVE -
immintrin.h
- OS-Targeted -
- - -
\ No newline at end of file From 3f4f522850a721c103420a90f3c6bed8c22059c5 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 15 Oct 2025 14:33:21 +0530 Subject: [PATCH 219/358] chore: move from random testing to testing only the first N intrinsics --- stdarch/Cargo.lock | 77 ++------------------ stdarch/crates/intrinsic-test/Cargo.toml | 1 - stdarch/crates/intrinsic-test/src/x86/mod.rs | 10 +-- 3 files changed, 9 insertions(+), 79 deletions(-) diff --git a/stdarch/Cargo.lock b/stdarch/Cargo.lock index e198e14ffe178..70f09adf2c857 100644 --- a/stdarch/Cargo.lock +++ b/stdarch/Cargo.lock @@ -282,18 +282,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "getrandom" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", -] - [[package]] name = "hashbrown" version = "0.12.3" @@ -360,7 +348,6 @@ dependencies = [ "log", "pretty_env_logger", "quick-xml 0.37.5", - "rand 0.9.2", "rayon", "regex", "serde", @@ -486,7 +473,7 @@ checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" dependencies = [ "env_logger 0.8.4", "log", - "rand 0.8.5", + "rand", ] [[package]] @@ -498,12 +485,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - [[package]] name = "rand" version = "0.8.5" @@ -511,18 +492,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" -dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_chacha", + "rand_core", ] [[package]] @@ -532,17 +503,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core 0.9.3", + "rand_core", ] [[package]] @@ -551,16 +512,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", -] - -[[package]] -name = "rand_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" -dependencies = [ - "getrandom 0.3.4", + "getrandom", ] [[package]] @@ -751,7 +703,7 @@ dependencies = [ name = "stdarch-gen-loongarch" version = "0.1.0" dependencies = [ - "rand 0.8.5", + "rand", ] [[package]] @@ -784,7 +736,7 @@ version = "0.0.0" dependencies = [ "core_arch", "quickcheck", - "rand 0.8.5", + "rand", ] [[package]] @@ -867,15 +819,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasip2" -version = "1.0.1+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" -dependencies = [ - "wit-bindgen", -] - [[package]] name = "wasmparser" version = "0.235.0" @@ -1060,12 +1003,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" -[[package]] -name = "wit-bindgen" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" - [[package]] name = "xml-rs" version = "0.8.27" diff --git a/stdarch/crates/intrinsic-test/Cargo.toml b/stdarch/crates/intrinsic-test/Cargo.toml index 9fb70f32f81bc..2c0f53897e797 100644 --- a/stdarch/crates/intrinsic-test/Cargo.toml +++ b/stdarch/crates/intrinsic-test/Cargo.toml @@ -22,4 +22,3 @@ itertools = "0.14.0" quick-xml = { version = "0.37.5", features = ["serialize", "overlapped-lists"] } serde-xml-rs = "0.8.0" regex = "1.11.1" -rand = "0.9.2" diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index ca5748e5fb0c5..a28c8647fe1a2 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -12,8 +12,6 @@ use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::X86IntrinsicType; use itertools::Itertools; -use rand::rng; -use rand::seq::IndexedRandom; use xml_parser::get_xml_intrinsics; pub struct X86ArchitectureTest { @@ -49,10 +47,9 @@ impl SupportedArchitectureTest for X86ArchitectureTest { let intrinsics = get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file"); - let mut rng = rng(); let sample_percentage: usize = cli_options.sample_percentage as usize; - let intrinsics = intrinsics + let mut intrinsics = intrinsics .into_iter() // Not sure how we would compare intrinsic that returns void. .filter(|i| i.results.kind() != TypeKind::Void) @@ -68,10 +65,7 @@ impl SupportedArchitectureTest for X86ArchitectureTest { .collect::>(); let sample_size = (intrinsics.len() * sample_percentage) / 100; - let mut intrinsics = intrinsics - .choose_multiple(&mut rng, sample_size) - .cloned() - .collect::>(); + intrinsics.truncate(sample_size); intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); Self { From 687320f48439b39f17d62c7c61e83519c693e3e9 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 15 Oct 2025 15:04:04 +0530 Subject: [PATCH 220/358] chore: convert println! logging to trace! logging during compilation step --- stdarch/crates/intrinsic-test/src/common/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/mod.rs b/stdarch/crates/intrinsic-test/src/common/mod.rs index 86a7876807fd6..d8f06ae23885e 100644 --- a/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -76,12 +76,12 @@ pub trait SupportedArchitectureTest { // // This is done because `cpp_compiler_wrapped` is None when // the --generate-only flag is passed - println!("compiling mod_{i}.cpp"); + trace!("compiling mod_{i}.cpp"); if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { let compile_output = cpp_compiler .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o")); - println!("finished compiling mod_{i}.cpp"); + trace!("finished compiling mod_{i}.cpp"); if let Err(compile_error) = compile_output { return Err(format!("Error compiling mod_{i}.cpp: {compile_error:?}")); } @@ -104,7 +104,7 @@ pub trait SupportedArchitectureTest { // the --generate-only flag is passed if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { // compile this cpp file into a .o file - info!("compiling main.cpp"); + trace!("compiling main.cpp"); let output = cpp_compiler .compile_object_file("main.cpp", "intrinsic-test-programs.o") .unwrap(); From 765278d8b5d3d0acd0ea0c131a232277e4b2be72 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 15 Oct 2025 20:54:11 +0530 Subject: [PATCH 221/358] feat: code cleanup 1. changing array bracket prefixes from &'static str to char 2. including variable names in template strings instead of passing them as arguments to macros --- .../crates/intrinsic-test/src/arm/types.rs | 14 +++++----- .../intrinsic-test/src/common/argument.rs | 2 +- .../src/common/intrinsic_helpers.rs | 28 +++++++++---------- .../crates/intrinsic-test/src/x86/config.rs | 2 +- .../crates/intrinsic-test/src/x86/types.rs | 4 +-- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/types.rs b/stdarch/crates/intrinsic-test/src/arm/types.rs index c798cbe42d03f..4be8d1e48b49a 100644 --- a/stdarch/crates/intrinsic-test/src/arm/types.rs +++ b/stdarch/crates/intrinsic-test/src/arm/types.rs @@ -14,10 +14,10 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { (None, None) => format!("{const_prefix}{prefix}{bit_len}_t"), (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"), (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"), - (None, Some(_)) => todo!("{:#?}", self), // Likely an invalid case + (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case } } else { - todo!("{:#?}", self) + todo!("{self:#?}") } } @@ -58,14 +58,14 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { // The ACLE doesn't support 64-bit polynomial loads on Armv7 // if armv7 and bl == 64, use "s", else "p" TypeKind::Poly => if choose_workaround && *bl == 64 {"s"} else {"p"}, - x => todo!("get_load_function TypeKind: {:#?}", x), + x => todo!("get_load_function TypeKind: {x:#?}"), }, size = bl, quad = quad, len = vec_len.unwrap_or(1), ) } else { - todo!("get_load_function IntrinsicType: {:#?}", self) + todo!("get_load_function IntrinsicType: {self:#?}") } } @@ -90,13 +90,13 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { TypeKind::Int(Sign::Signed) => "s", TypeKind::Float => "f", TypeKind::Poly => "p", - x => todo!("get_load_function TypeKind: {:#?}", x), + x => todo!("get_load_function TypeKind: {x:#?}"), }, size = bl, quad = quad, ) } else { - todo!("get_lane_function IntrinsicType: {:#?}", self) + todo!("get_lane_function IntrinsicType: {self:#?}") } } @@ -143,7 +143,7 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { TypeKind::Int(Sign::Signed) => format!("int{}_t", self.inner_size()), TypeKind::Int(Sign::Unsigned) => format!("uint{}_t", self.inner_size()), TypeKind::Poly => format!("poly{}_t", self.inner_size()), - ty => todo!("print_result_c - Unknown type: {:#?}", ty), + ty => todo!("print_result_c - Unknown type: {ty:#?}"), }, promote = self.generate_final_type_cast(), ) diff --git a/stdarch/crates/intrinsic-test/src/common/argument.rs b/stdarch/crates/intrinsic-test/src/common/argument.rs index 5963abef2f952..5fb7d0f2109e5 100644 --- a/stdarch/crates/intrinsic-test/src/common/argument.rs +++ b/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -31,7 +31,7 @@ where pub fn to_c_type(&self) -> String { let prefix = if self.ty.constant { "const " } else { "" }; - format!("{}{}", prefix, self.ty.c_type()) + format!("{prefix}{}", self.ty.c_type()) } pub fn generate_name(&self) -> String { diff --git a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index aa8613206ea0b..c2d66868ceb49 100644 --- a/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -80,7 +80,7 @@ impl TypeKind { Self::Poly => "poly", Self::Char(Sign::Signed) => "char", Self::Vector => "int", - _ => unreachable!("Not used: {:#?}", self), + _ => unreachable!("Not used: {self:#?}"), } } @@ -94,7 +94,7 @@ impl TypeKind { Self::Poly => "u", Self::Char(Sign::Unsigned) => "u", Self::Char(Sign::Signed) => "i", - _ => unreachable!("Unused type kind: {:#?}", self), + _ => unreachable!("Unused type kind: {self:#?}"), } } } @@ -134,7 +134,7 @@ impl IntrinsicType { if let Some(bl) = self.bit_len { cmp::max(bl, 8) } else { - unreachable!("{:#?}", self) + unreachable!("{self:#?}") } } @@ -225,8 +225,8 @@ impl IntrinsicType { .. } => { let (prefix, suffix) = match language { - Language::Rust => ("[", "]"), - Language::C => ("{", "}"), + Language::Rust => ('[', ']'), + Language::C => ('{', '}'), }; let body_indentation = indentation.nested(); format!( @@ -262,12 +262,12 @@ impl IntrinsicType { .. } => { let (prefix, cast_prefix, cast_suffix, suffix) = match (language, bit_len) { - (&Language::Rust, 16) => ("[", "f16::from_bits(", ")", "]"), - (&Language::Rust, 32) => ("[", "f32::from_bits(", ")", "]"), - (&Language::Rust, 64) => ("[", "f64::from_bits(", ")", "]"), - (&Language::C, 16) => ("{", "cast(", ")", "}"), - (&Language::C, 32) => ("{", "cast(", ")", "}"), - (&Language::C, 64) => ("{", "cast(", ")", "}"), + (&Language::Rust, 16) => ('[', "f16::from_bits(", ")", ']'), + (&Language::Rust, 32) => ('[', "f32::from_bits(", ")", ']'), + (&Language::Rust, 64) => ('[', "f64::from_bits(", ")", ']'), + (&Language::C, 16) => ('{', "cast(", ")", '}'), + (&Language::C, 32) => ('{', "cast(", ")", '}'), + (&Language::C, 64) => ('{', "cast(", ")", '}'), _ => unreachable!(), }; format!( @@ -288,8 +288,8 @@ impl IntrinsicType { .. } => { let (prefix, suffix) = match language { - Language::Rust => ("[", "]"), - Language::C => ("{", "}"), + Language::Rust => ('[', ']'), + Language::C => ('{', '}'), }; let body_indentation = indentation.nested(); let effective_bit_len = 32; @@ -317,7 +317,7 @@ impl IntrinsicType { }) ) } - _ => unimplemented!("populate random: {:#?}", self), + _ => unimplemented!("populate random: {self:#?}"), } } diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 6be3f1b133896..6d913acca7954 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -213,7 +213,7 @@ trait DebugAs { impl DebugAs for T { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{}", self) + write!(f, "{self}") } } diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index cdfc6bfa98279..be15b6dccdde7 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -219,11 +219,11 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { TypeKind::Float if self.inner_size() == 32 => "float".to_string(), TypeKind::Mask => format!( "__mmask{}", - self.bit_len.expect(format!("self: {:#?}", self).as_str()) + self.bit_len.expect(format!("self: {self:#?}").as_str()) ), TypeKind::Vector => format!( "__m{}i", - self.bit_len.expect(format!("self: {:#?}", self).as_str()) + self.bit_len.expect(format!("self: {self:#?}").as_str()) ), _ => self.c_scalar_type(), }, From 20cacb07f2b2d6ba5489a7cc9b63cdddd724eb64 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 16 Oct 2025 09:47:17 +0530 Subject: [PATCH 222/358] chore: make names in config.rs files uniform across architectures --- stdarch/crates/intrinsic-test/src/arm/config.rs | 8 ++++---- stdarch/crates/intrinsic-test/src/arm/mod.rs | 8 ++++---- stdarch/crates/intrinsic-test/src/x86/config.rs | 4 ++-- stdarch/crates/intrinsic-test/src/x86/mod.rs | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/arm/config.rs b/stdarch/crates/intrinsic-test/src/arm/config.rs index e2bc501127dd3..a63464596915d 100644 --- a/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -3,7 +3,7 @@ pub const NOTICE: &str = "\ // test are derived from a JSON specification, published under the same license as the // `intrinsic-test` crate.\n"; -pub const POLY128_OSTREAM_DECL: &str = r#" +pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" #ifdef __aarch64__ std::ostream& operator<<(std::ostream& os, poly128_t value); #endif @@ -20,7 +20,7 @@ template T1 cast(T2 x) { } "#; -pub const POLY128_OSTREAM_DEF: &str = r#" +pub const PLATFORM_C_DEFINITIONS: &str = r#" #ifdef __aarch64__ std::ostream& operator<<(std::ostream& os, poly128_t value) { std::stringstream temp; @@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& os, uint8_t value) { "#; // Format f16 values (and vectors containing them) in a way that is consistent with C. -pub const F16_FORMATTING_DEF: &str = r#" +pub const PLATFORM_RUST_DEFINITIONS: &str = r#" /// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they /// were before moving to array-based simd. #[inline] @@ -139,7 +139,7 @@ impl DebugHexF16 for float16x8x4_t { } "#; -pub const AARCH_CONFIGURATIONS: &str = r#" +pub const PLATFORM_RUST_CFGS: &str = r#" #![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] #![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] diff --git a/stdarch/crates/intrinsic-test/src/arm/mod.rs b/stdarch/crates/intrinsic-test/src/arm/mod.rs index 08dc2d38702cd..7fa5062e86522 100644 --- a/stdarch/crates/intrinsic-test/src/arm/mod.rs +++ b/stdarch/crates/intrinsic-test/src/arm/mod.rs @@ -32,11 +32,11 @@ impl SupportedArchitectureTest for ArmArchitectureTest { const NOTICE: &str = config::NOTICE; const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; - const PLATFORM_C_DEFINITIONS: &str = config::POLY128_OSTREAM_DEF; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::POLY128_OSTREAM_DECL; + const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; + const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; - const PLATFORM_RUST_DEFINITIONS: &str = config::F16_FORMATTING_DEF; - const PLATFORM_RUST_CFGS: &str = config::AARCH_CONFIGURATIONS; + const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; + const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; fn cpp_compilation(&self) -> Option { compile::build_cpp_compilation(&self.cli_options) diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index 6d913acca7954..d7770189eb1ea 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -4,7 +4,7 @@ pub const NOTICE: &str = "\ // `intrinsic-test` crate.\n"; // Format f16 values (and vectors containing them) in a way that is consistent with C. -pub const F16_FORMATTING_DEF: &str = r#" +pub const PLATFORM_RUST_DEFINITIONS: &str = r#" use std::arch::x86_64::*; #[inline] @@ -392,7 +392,7 @@ std::ostream& operator<<(std::ostream& os, __mmask8 value) { } "#; -pub const X86_CONFIGURATIONS: &str = r#" +pub const PLATFORM_RUST_CFGS: &str = r#" #![cfg_attr(target_arch = "x86", feature(avx))] #![cfg_attr(target_arch = "x86", feature(sse))] #![cfg_attr(target_arch = "x86", feature(sse2))] diff --git a/stdarch/crates/intrinsic-test/src/x86/mod.rs b/stdarch/crates/intrinsic-test/src/x86/mod.rs index a28c8647fe1a2..956e51836f3f7 100644 --- a/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -40,8 +40,8 @@ impl SupportedArchitectureTest for X86ArchitectureTest { const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; - const PLATFORM_RUST_DEFINITIONS: &str = config::F16_FORMATTING_DEF; - const PLATFORM_RUST_CFGS: &str = config::X86_CONFIGURATIONS; + const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; + const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; fn create(cli_options: ProcessedCli) -> Self { let intrinsics = From 414470c1d9d891f3dc0521910726407e7b07e21f Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Fri, 17 Oct 2025 22:26:59 +0530 Subject: [PATCH 223/358] fix: remove the PATH update in ci/run.sh --- stdarch/ci/run.sh | 2 -- stdarch/crates/intrinsic-test/src/common/compare.rs | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/stdarch/ci/run.sh b/stdarch/ci/run.sh index bd0e06687fa6a..48dfe2a77dcd2 100755 --- a/stdarch/ci/run.sh +++ b/stdarch/ci/run.sh @@ -96,8 +96,6 @@ case ${TARGET} in TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt TEST_SAMPLE_INTRINSICS_PERCENTAGE=5 export STDARCH_DISABLE_ASSERT_INSTR=1 - PATH="$PATH":"$(pwd)"/c_programs - export PATH export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" cargo_test "${PROFILE}" diff --git a/stdarch/crates/intrinsic-test/src/common/compare.rs b/stdarch/crates/intrinsic-test/src/common/compare.rs index 89e5f965bc8e9..902df94283fd6 100644 --- a/stdarch/crates/intrinsic-test/src/common/compare.rs +++ b/stdarch/crates/intrinsic-test/src/common/compare.rs @@ -15,13 +15,13 @@ pub fn compare_outputs(intrinsic_name_list: &Vec, runner: &str, target: .par_iter() .filter_map(|intrinsic_name| { let c = runner_command(runner) - .arg("intrinsic-test-programs") + .arg("./intrinsic-test-programs") .arg(intrinsic_name) .current_dir("c_programs") .output(); let rust = runner_command(runner) - .arg(format!("target/{target}/release/intrinsic-test-programs")) + .arg(format!("./target/{target}/release/intrinsic-test-programs")) .arg(intrinsic_name) .current_dir("rust_programs") .output(); From 4b1299ebcaef35b65147a8074709f242a254d76b Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Thu, 23 Oct 2025 10:17:33 +0530 Subject: [PATCH 224/358] feat: fixing Rust's print mechanism for _mm512_conj_pch --- stdarch/crates/intrinsic-test/missing_x86.txt | 2 +- stdarch/crates/intrinsic-test/src/x86/config.rs | 3 +++ stdarch/crates/intrinsic-test/src/x86/types.rs | 6 ++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/stdarch/crates/intrinsic-test/missing_x86.txt b/stdarch/crates/intrinsic-test/missing_x86.txt index e546799740c73..58e37b92a1dbb 100644 --- a/stdarch/crates/intrinsic-test/missing_x86.txt +++ b/stdarch/crates/intrinsic-test/missing_x86.txt @@ -890,7 +890,7 @@ _mm256_extract_epi16 _mm256_extract_epi8 _mm512_castsi128_si512 _mm512_castsi256_si512 -_mm512_conj_pch +# _mm512_conj_pch _mm512_mask_reduce_max_pd _mm512_mask_reduce_max_ps _mm512_mask_reduce_min_pd diff --git a/stdarch/crates/intrinsic-test/src/x86/config.rs b/stdarch/crates/intrinsic-test/src/x86/config.rs index d7770189eb1ea..7c349e448206e 100644 --- a/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -235,6 +235,9 @@ macro_rules! impl_debug_as { impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64]); impl_debug_as!(__m256i, "__m256i", 256, [u8, i8, u16, i16, u32, i32, u64, i64]); impl_debug_as!(__m512i, "__m512i", 512, [u8, i8, u16, i16, u32, i32, u64, i64]); +impl_debug_as!(__m128h, "__m128h", 128, [f32]); +impl_debug_as!(__m256h, "__m256h", 256, [f32]); +impl_debug_as!(__m512h, "__m512h", 512, [f32]); fn debug_as(x: V) -> impl core::fmt::Debug where V: DebugAs diff --git a/stdarch/crates/intrinsic-test/src/x86/types.rs b/stdarch/crates/intrinsic-test/src/x86/types.rs index be15b6dccdde7..87932fcb3ec76 100644 --- a/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -290,6 +290,12 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { fn print_result_rust(&self) -> String { let return_value = match self.kind() { TypeKind::Float if self.inner_size() == 16 => "debug_f16(__return_value)".to_string(), + TypeKind::Float + if self.inner_size() == 32 + && ["__m512h"].contains(&self.param.type_data.as_str()) => + { + "debug_as::<_, f32>(__return_value)".to_string() + } TypeKind::Int(_) if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) => { From 1a5c1178d6e97f17bf1a98fd71508c41ef75f5e7 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Sun, 26 Oct 2025 18:19:45 +0530 Subject: [PATCH 225/358] feat: added x86_64-unknown-linux-gnu to the test matrix of `intrinsic-test` --- stdarch/.github/workflows/main.yml | 1 + stdarch/ci/intrinsic-test.sh | 24 ++++++++++++++++++++++++ stdarch/ci/run.sh | 5 ----- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/stdarch/.github/workflows/main.yml b/stdarch/.github/workflows/main.yml index b852110a3258a..28c15cf4734ac 100644 --- a/stdarch/.github/workflows/main.yml +++ b/stdarch/.github/workflows/main.yml @@ -260,6 +260,7 @@ jobs: - aarch64_be-unknown-linux-gnu - armv7-unknown-linux-gnueabihf - arm-unknown-linux-gnueabihf + - x86_64-unknown-linux-gnu profile: [dev, release] include: - target: aarch64_be-unknown-linux-gnu diff --git a/stdarch/ci/intrinsic-test.sh b/stdarch/ci/intrinsic-test.sh index 469e9e21c74c7..e14a824b2ae66 100755 --- a/stdarch/ci/intrinsic-test.sh +++ b/stdarch/ci/intrinsic-test.sh @@ -66,6 +66,14 @@ case ${TARGET} in TEST_CXX_COMPILER="clang++" TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" ;; + + x86_64-unknown-linux-gnu*) + TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/" + TEST_CXX_COMPILER="clang++" + TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt + TEST_SAMPLE_INTRINSICS_PERCENTAGE=5 + ;; *) ;; @@ -94,6 +102,22 @@ case "${TARGET}" in --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \ --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" ;; + + x86_64-unknown-linux-gnu*) + # `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER` is not necessary for `intrinsic-test` + # because the binary needs to run directly on the host. + # Hence the use of `env -u`. + env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ + CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \ + RUST_LOG=warn RUST_BACKTRACE=1 \ + cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ + --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ + --runner "${TEST_RUNNER}" \ + --skip "${TEST_SKIP_INTRINSICS}" \ + --cppcompiler "${TEST_CXX_COMPILER}" \ + --target "${TARGET}" \ + --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" + ;; *) ;; esac diff --git a/stdarch/ci/run.sh b/stdarch/ci/run.sh index 48dfe2a77dcd2..2bb77bae256f1 100755 --- a/stdarch/ci/run.sh +++ b/stdarch/ci/run.sh @@ -90,11 +90,6 @@ fi # Test targets compiled with extra features. case ${TARGET} in x86_64-unknown-linux-gnu) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/" - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt - TEST_SAMPLE_INTRINSICS_PERCENTAGE=5 export STDARCH_DISABLE_ASSERT_INSTR=1 export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" From ceaf2b030cfc3f03d378ed86a90bbd0598d6661d Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 25 Oct 2025 18:36:19 +0200 Subject: [PATCH 226/358] intrinsic test: deduplicate rust constants --- .../intrinsic-test/src/common/argument.rs | 42 ++++++++++++++----- .../intrinsic-test/src/common/gen_rust.rs | 17 +++++++- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/argument.rs b/stdarch/crates/intrinsic-test/src/common/argument.rs index 5fb7d0f2109e5..385cf32d3bff9 100644 --- a/stdarch/crates/intrinsic-test/src/common/argument.rs +++ b/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -60,9 +60,15 @@ where } /// The name (e.g. "A_VALS" or "a_vals") for the array of possible test inputs. - fn rust_vals_array_name(&self) -> impl std::fmt::Display { + pub(crate) fn rust_vals_array_name(&self) -> impl std::fmt::Display { if self.ty.is_rust_vals_array_const() { - format!("{}_VALS", self.name.to_uppercase()) + let loads = crate::common::gen_rust::PASSES; + format!( + "{}_{ty}_{load_size}", + self.name.to_uppercase(), + ty = self.ty.rust_scalar_type(), + load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1, + ) } else { format!("{}_vals", self.name.to_lowercase()) } @@ -134,20 +140,34 @@ where loads: u32, ) -> std::io::Result<()> { for arg in self.iter().filter(|&arg| !arg.has_constraint()) { - writeln!( - w, - "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};", - bind = arg.rust_vals_array_binding(), - name = arg.rust_vals_array_name(), - ty = arg.ty.rust_scalar_type(), - load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, - values = arg.ty.populate_random(indentation, loads, &Language::Rust) - )? + // Constants are defined globally. + if arg.ty.is_rust_vals_array_const() { + continue; + } + + Self::gen_arg_rust(arg, w, indentation, loads)?; } Ok(()) } + pub fn gen_arg_rust( + arg: &Argument, + w: &mut impl std::io::Write, + indentation: Indentation, + loads: u32, + ) -> std::io::Result<()> { + writeln!( + w, + "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};\n", + bind = arg.rust_vals_array_binding(), + name = arg.rust_vals_array_name(), + ty = arg.ty.rust_scalar_type(), + load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, + values = arg.ty.populate_random(indentation, loads, &Language::Rust) + ) + } + /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at /// an offset `i` using a load intrinsic, in C. /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);` diff --git a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs index 27f49a37b1cf6..00bcf04850cbc 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -1,13 +1,14 @@ use itertools::Itertools; use std::process::Command; +use crate::common::argument::ArgumentList; use crate::common::intrinsic::Intrinsic; use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; // The number of times each intrinsic will be called. -const PASSES: u32 = 20; +pub(crate) const PASSES: u32 = 20; fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { writeln!( @@ -118,6 +119,20 @@ pub fn write_lib_rs( writeln!(w, "{definitions}")?; + let mut seen = std::collections::HashSet::new(); + + for intrinsic in intrinsics { + for arg in &intrinsic.arguments.args { + if !arg.has_constraint() && arg.ty.is_rust_vals_array_const() { + let name = arg.rust_vals_array_name().to_string(); + + if seen.insert(name) { + ArgumentList::gen_arg_rust(arg, w, Indentation::default(), PASSES)?; + } + } + } + } + for intrinsic in intrinsics { crate::common::gen_rust::create_rust_test_module(w, intrinsic)?; } From 3034e65baaece98ea33a990ebb9c28ae0d97cd85 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 25 Oct 2025 18:50:02 +0200 Subject: [PATCH 227/358] intrinsic test: fix formatting (a bit, at least) --- .../intrinsic-test/src/common/gen_rust.rs | 48 +++++++++++-------- .../intrinsic-test/src/common/indentation.rs | 4 ++ 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs index 00bcf04850cbc..c8d815e46eae3 100644 --- a/stdarch/crates/intrinsic-test/src/common/gen_rust.rs +++ b/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -10,16 +10,22 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; // The number of times each intrinsic will be called. pub(crate) const PASSES: u32 = 20; +macro_rules! concatln { + ($($lines:expr),* $(,)?) => { + concat!($( $lines, "\n" ),*) + }; +} + fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { writeln!( w, - concat!( - "[package]\n", - "name = \"{name}\"\n", - "version = \"{version}\"\n", - "authors = [{authors}]\n", - "license = \"{license}\"\n", - "edition = \"2018\"\n", + concatln!( + "[package]", + "name = \"{name}\"", + "version = \"{version}\"", + "authors = [{authors}]", + "license = \"{license}\"", + "edition = \"2018\"", ), name = name, version = env!("CARGO_PKG_VERSION"), @@ -247,23 +253,23 @@ pub fn generate_rust_test_loop( } } - let indentation2 = indentation.nested(); - let indentation3 = indentation2.nested(); - writeln!( + write!( w, - "\ - for (id, f) in specializations {{\n\ - for i in 0..{passes} {{\n\ - unsafe {{\n\ - {loaded_args}\ - let __return_value = f({args});\n\ - println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});\n\ - }}\n\ - }}\n\ - }}", - loaded_args = intrinsic.arguments.load_values_rust(indentation3), + concatln!( + " for (id, f) in specializations {{", + " for i in 0..{passes} {{", + " unsafe {{", + "{loaded_args}", + " let __return_value = f({args});", + " println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});", + " }}", + " }}", + " }}", + ), + loaded_args = intrinsic.arguments.load_values_rust(indentation.nest_by(4)), args = intrinsic.arguments.as_call_param_rust(), return_value = intrinsic.results.print_result_rust(), + passes = passes, ) } diff --git a/stdarch/crates/intrinsic-test/src/common/indentation.rs b/stdarch/crates/intrinsic-test/src/common/indentation.rs index 9ee331d7f7a3f..9c2cc886e6544 100644 --- a/stdarch/crates/intrinsic-test/src/common/indentation.rs +++ b/stdarch/crates/intrinsic-test/src/common/indentation.rs @@ -10,6 +10,10 @@ impl Indentation { pub fn nested(self) -> Self { Self(self.0 + 1) } + + pub fn nest_by(&self, additional_levels: u32) -> Self { + Self(self.0 + additional_levels) + } } impl std::fmt::Display for Indentation { From 8ba096143c984bb3a99264bd5f4f0b5726634826 Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 29 Aug 2025 05:29:34 +0530 Subject: [PATCH 228/358] Make ADC/ADX intrinsics safe --- stdarch/crates/core_arch/src/x86/adx.rs | 131 ++++++++++----------- stdarch/crates/core_arch/src/x86_64/adx.rs | 120 +++++++++---------- 2 files changed, 120 insertions(+), 131 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/adx.rs b/stdarch/crates/core_arch/src/x86/adx.rs index 9ce65b76431a4..7d91697133111 100644 --- a/stdarch/crates/core_arch/src/x86/adx.rs +++ b/stdarch/crates/core_arch/src/x86/adx.rs @@ -17,8 +17,8 @@ unsafe extern "unadjusted" { #[inline] #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] -pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { - let (a, b) = llvm_addcarry_u32(c_in, a, b); +pub fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { + let (a, b) = unsafe { llvm_addcarry_u32(c_in, a, b) }; *out = b; a } @@ -32,7 +32,7 @@ pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { #[target_feature(enable = "adx")] #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] -pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { +pub fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { _addcarry_u32(c_in, a, b, out) } @@ -44,8 +44,8 @@ pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { #[inline] #[cfg_attr(test, assert_instr(sbb))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] -pub unsafe fn _subborrow_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { - let (a, b) = llvm_subborrow_u32(c_in, a, b); +pub fn _subborrow_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { + let (a, b) = unsafe { llvm_subborrow_u32(c_in, a, b) }; *out = b; a } @@ -58,38 +58,36 @@ mod tests { #[test] fn test_addcarry_u32() { - unsafe { - let a = u32::MAX; - let mut out = 0; - - let r = _addcarry_u32(0, a, 1, &mut out); - assert_eq!(r, 1); - assert_eq!(out, 0); - - let r = _addcarry_u32(0, a, 0, &mut out); - assert_eq!(r, 0); - assert_eq!(out, a); - - let r = _addcarry_u32(1, a, 1, &mut out); - assert_eq!(r, 1); - assert_eq!(out, 1); - - let r = _addcarry_u32(1, a, 0, &mut out); - assert_eq!(r, 1); - assert_eq!(out, 0); - - let r = _addcarry_u32(0, 3, 4, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 7); - - let r = _addcarry_u32(1, 3, 4, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 8); - } + let a = u32::MAX; + let mut out = 0; + + let r = _addcarry_u32(0, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u32(0, a, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, a); + + let r = _addcarry_u32(1, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 1); + + let r = _addcarry_u32(1, a, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u32(0, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 7); + + let r = _addcarry_u32(1, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 8); } #[simd_test(enable = "adx")] - unsafe fn test_addcarryx_u32() { + fn test_addcarryx_u32() { let a = u32::MAX; let mut out = 0; @@ -119,44 +117,39 @@ mod tests { } #[simd_test(enable = "adx")] - unsafe fn test_addcarryx_u32_2() { - unsafe fn add_1_2_3() -> u32 { - let mut out = 0; - _addcarryx_u32(1, 2, 3, &mut out); - out - } - assert_eq!(6, add_1_2_3()); + fn test_addcarryx_u32_2() { + let mut out = 0; + _addcarryx_u32(1, 2, 3, &mut out); + assert_eq!(6, out); } #[test] fn test_subborrow_u32() { - unsafe { - let a = u32::MAX; - let mut out = 0; - - let r = _subborrow_u32(0, 0, 1, &mut out); - assert_eq!(r, 1); - assert_eq!(out, a); - - let r = _subborrow_u32(0, 0, 0, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 0); - - let r = _subborrow_u32(1, 0, 1, &mut out); - assert_eq!(r, 1); - assert_eq!(out, a - 1); - - let r = _subborrow_u32(1, 0, 0, &mut out); - assert_eq!(r, 1); - assert_eq!(out, a); - - let r = _subborrow_u32(0, 7, 3, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 4); - - let r = _subborrow_u32(1, 7, 3, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 3); - } + let a = u32::MAX; + let mut out = 0; + + let r = _subborrow_u32(0, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u32(0, 0, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 0); + + let r = _subborrow_u32(1, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a - 1); + + let r = _subborrow_u32(1, 0, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u32(0, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 4); + + let r = _subborrow_u32(1, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 3); } } diff --git a/stdarch/crates/core_arch/src/x86_64/adx.rs b/stdarch/crates/core_arch/src/x86_64/adx.rs index cf378cc169c34..74a473e6390c8 100644 --- a/stdarch/crates/core_arch/src/x86_64/adx.rs +++ b/stdarch/crates/core_arch/src/x86_64/adx.rs @@ -17,8 +17,8 @@ unsafe extern "unadjusted" { #[inline] #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] -pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { - let (a, b) = llvm_addcarry_u64(c_in, a, b); +pub fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { + let (a, b) = unsafe { llvm_addcarry_u64(c_in, a, b) }; *out = b; a } @@ -32,7 +32,7 @@ pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { #[target_feature(enable = "adx")] #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] -pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { +pub fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { _addcarry_u64(c_in, a, b, out) } @@ -44,8 +44,8 @@ pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { #[inline] #[cfg_attr(test, assert_instr(sbb))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] -pub unsafe fn _subborrow_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { - let (a, b) = llvm_subborrow_u64(c_in, a, b); +pub fn _subborrow_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { + let (a, b) = unsafe { llvm_subborrow_u64(c_in, a, b) }; *out = b; a } @@ -58,38 +58,36 @@ mod tests { #[test] fn test_addcarry_u64() { - unsafe { - let a = u64::MAX; - let mut out = 0; - - let r = _addcarry_u64(0, a, 1, &mut out); - assert_eq!(r, 1); - assert_eq!(out, 0); - - let r = _addcarry_u64(0, a, 0, &mut out); - assert_eq!(r, 0); - assert_eq!(out, a); - - let r = _addcarry_u64(1, a, 1, &mut out); - assert_eq!(r, 1); - assert_eq!(out, 1); - - let r = _addcarry_u64(1, a, 0, &mut out); - assert_eq!(r, 1); - assert_eq!(out, 0); - - let r = _addcarry_u64(0, 3, 4, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 7); - - let r = _addcarry_u64(1, 3, 4, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 8); - } + let a = u64::MAX; + let mut out = 0; + + let r = _addcarry_u64(0, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, a, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, a); + + let r = _addcarry_u64(1, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 1); + + let r = _addcarry_u64(1, a, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 7); + + let r = _addcarry_u64(1, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 8); } #[simd_test(enable = "adx")] - unsafe fn test_addcarryx_u64() { + fn test_addcarryx_u64() { let a = u64::MAX; let mut out = 0; @@ -120,33 +118,31 @@ mod tests { #[test] fn test_subborrow_u64() { - unsafe { - let a = u64::MAX; - let mut out = 0; - - let r = _subborrow_u64(0, 0, 1, &mut out); - assert_eq!(r, 1); - assert_eq!(out, a); - - let r = _subborrow_u64(0, 0, 0, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 0); - - let r = _subborrow_u64(1, 0, 1, &mut out); - assert_eq!(r, 1); - assert_eq!(out, a - 1); - - let r = _subborrow_u64(1, 0, 0, &mut out); - assert_eq!(r, 1); - assert_eq!(out, a); - - let r = _subborrow_u64(0, 7, 3, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 4); - - let r = _subborrow_u64(1, 7, 3, &mut out); - assert_eq!(r, 0); - assert_eq!(out, 3); - } + let a = u64::MAX; + let mut out = 0; + + let r = _subborrow_u64(0, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u64(0, 0, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 0); + + let r = _subborrow_u64(1, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a - 1); + + let r = _subborrow_u64(1, 0, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u64(0, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 4); + + let r = _subborrow_u64(1, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 3); } } From 050bfd6e20cfe2042410de4f0af569c8a83762d8 Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 29 Aug 2025 05:29:53 +0530 Subject: [PATCH 229/358] Make `_mm512_reduce_mul_ph` safe (missed) --- stdarch/crates/core_arch/src/x86/avx512fp16.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/stdarch/crates/core_arch/src/x86/avx512fp16.rs index 2f02b70fa86f9..293fda3064dcb 100644 --- a/stdarch/crates/core_arch/src/x86/avx512fp16.rs +++ b/stdarch/crates/core_arch/src/x86/avx512fp16.rs @@ -11202,7 +11202,7 @@ pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 { +pub fn _mm512_reduce_mul_ph(a: __m512h) -> f16 { unsafe { let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); let q = simd_shuffle!( From b4181ca4956829702506690f76e3237429717b0d Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 29 Aug 2025 05:30:11 +0530 Subject: [PATCH 230/358] Make `_bswap{,64}` safe --- stdarch/crates/core_arch/src/x86/bswap.rs | 8 +++----- stdarch/crates/core_arch/src/x86_64/bswap.rs | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/bswap.rs b/stdarch/crates/core_arch/src/x86/bswap.rs index 0db9acbd0ddf8..ea07a7d6229af 100644 --- a/stdarch/crates/core_arch/src/x86/bswap.rs +++ b/stdarch/crates/core_arch/src/x86/bswap.rs @@ -10,7 +10,7 @@ use stdarch_test::assert_instr; #[inline] #[cfg_attr(test, assert_instr(bswap))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _bswap(x: i32) -> i32 { +pub fn _bswap(x: i32) -> i32 { x.swap_bytes() } @@ -20,9 +20,7 @@ mod tests { #[test] fn test_bswap() { - unsafe { - assert_eq!(_bswap(0x0EADBE0F), 0x0FBEAD0E); - assert_eq!(_bswap(0x00000000), 0x00000000); - } + assert_eq!(_bswap(0x0EADBE0F), 0x0FBEAD0E); + assert_eq!(_bswap(0x00000000), 0x00000000); } } diff --git a/stdarch/crates/core_arch/src/x86_64/bswap.rs b/stdarch/crates/core_arch/src/x86_64/bswap.rs index 62cd2948ce14d..4e2d8b96eadee 100644 --- a/stdarch/crates/core_arch/src/x86_64/bswap.rs +++ b/stdarch/crates/core_arch/src/x86_64/bswap.rs @@ -11,7 +11,7 @@ use stdarch_test::assert_instr; #[inline] #[cfg_attr(test, assert_instr(bswap))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _bswap64(x: i64) -> i64 { +pub fn _bswap64(x: i64) -> i64 { x.swap_bytes() } @@ -21,9 +21,7 @@ mod tests { #[test] fn test_bswap64() { - unsafe { - assert_eq!(_bswap64(0x0EADBEEFFADECA0E), 0x0ECADEFAEFBEAD0E); - assert_eq!(_bswap64(0x0000000000000000), 0x0000000000000000); - } + assert_eq!(_bswap64(0x0EADBEEFFADECA0E), 0x0ECADEFAEFBEAD0E); + assert_eq!(_bswap64(0x0000000000000000), 0x0000000000000000); } } From 27f925b6eba7a62f2635f40c0a74614dd34010a1 Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 29 Aug 2025 05:30:24 +0530 Subject: [PATCH 231/358] Make RDRAND/RDSEED safe --- stdarch/crates/core_arch/src/x86/rdrand.rs | 16 ++++++++-------- stdarch/crates/core_arch/src/x86_64/rdrand.rs | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/rdrand.rs b/stdarch/crates/core_arch/src/x86/rdrand.rs index 50097915213b9..7ed03c258327d 100644 --- a/stdarch/crates/core_arch/src/x86/rdrand.rs +++ b/stdarch/crates/core_arch/src/x86/rdrand.rs @@ -26,8 +26,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { - let (v, flag) = x86_rdrand16_step(); +pub fn _rdrand16_step(val: &mut u16) -> i32 { + let (v, flag) = unsafe { x86_rdrand16_step() }; *val = v; flag } @@ -40,8 +40,8 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 { - let (v, flag) = x86_rdrand32_step(); +pub fn _rdrand32_step(val: &mut u32) -> i32 { + let (v, flag) = unsafe { x86_rdrand32_step() }; *val = v; flag } @@ -54,8 +54,8 @@ pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 { #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 { - let (v, flag) = x86_rdseed16_step(); +pub fn _rdseed16_step(val: &mut u16) -> i32 { + let (v, flag) = unsafe { x86_rdseed16_step() }; *val = v; flag } @@ -68,8 +68,8 @@ pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 { #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _rdseed32_step(val: &mut u32) -> i32 { - let (v, flag) = x86_rdseed32_step(); +pub fn _rdseed32_step(val: &mut u32) -> i32 { + let (v, flag) = unsafe { x86_rdseed32_step() }; *val = v; flag } diff --git a/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/stdarch/crates/core_arch/src/x86_64/rdrand.rs index 42e907b4e478d..dd195143413ef 100644 --- a/stdarch/crates/core_arch/src/x86_64/rdrand.rs +++ b/stdarch/crates/core_arch/src/x86_64/rdrand.rs @@ -23,8 +23,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 { - let (v, flag) = x86_rdrand64_step(); +pub fn _rdrand64_step(val: &mut u64) -> i32 { + let (v, flag) = unsafe { x86_rdrand64_step() }; *val = v; flag } @@ -37,8 +37,8 @@ pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 { #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _rdseed64_step(val: &mut u64) -> i32 { - let (v, flag) = x86_rdseed64_step(); +pub fn _rdseed64_step(val: &mut u64) -> i32 { + let (v, flag) = unsafe { x86_rdseed64_step() }; *val = v; flag } From 6580b3bd5cd3feceed6e6ebd405d3544a934c473 Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 29 Aug 2025 05:30:55 +0530 Subject: [PATCH 232/358] Make all TBM intrinsics safe --- stdarch/crates/core_arch/src/x86/tbm.rs | 20 ++++++++++---------- stdarch/crates/core_arch/src/x86_64/tbm.rs | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/tbm.rs b/stdarch/crates/core_arch/src/x86/tbm.rs index a245e693284fb..5a01752d8ac2e 100644 --- a/stdarch/crates/core_arch/src/x86/tbm.rs +++ b/stdarch/crates/core_arch/src/x86/tbm.rs @@ -30,7 +30,7 @@ unsafe extern "C" { #[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86_updates", since = "1.82.0")] -pub unsafe fn _bextri_u32(a: u32) -> u32 { +pub fn _bextri_u32(a: u32) -> u32 { static_assert_uimm_bits!(CONTROL, 16); unsafe { bextri_u32(a, CONTROL) } } @@ -42,7 +42,7 @@ pub unsafe fn _bextri_u32(a: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcfill))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcfill_u32(x: u32) -> u32 { +pub fn _blcfill_u32(x: u32) -> u32 { x & (x.wrapping_add(1)) } @@ -53,7 +53,7 @@ pub unsafe fn _blcfill_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blci))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blci_u32(x: u32) -> u32 { +pub fn _blci_u32(x: u32) -> u32 { x | !x.wrapping_add(1) } @@ -64,7 +64,7 @@ pub unsafe fn _blci_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcic))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcic_u32(x: u32) -> u32 { +pub fn _blcic_u32(x: u32) -> u32 { !x & x.wrapping_add(1) } @@ -76,7 +76,7 @@ pub unsafe fn _blcic_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcmsk_u32(x: u32) -> u32 { +pub fn _blcmsk_u32(x: u32) -> u32 { x ^ x.wrapping_add(1) } @@ -87,7 +87,7 @@ pub unsafe fn _blcmsk_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcs))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcs_u32(x: u32) -> u32 { +pub fn _blcs_u32(x: u32) -> u32 { x | x.wrapping_add(1) } @@ -98,7 +98,7 @@ pub unsafe fn _blcs_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsfill))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsfill_u32(x: u32) -> u32 { +pub fn _blsfill_u32(x: u32) -> u32 { x | x.wrapping_sub(1) } @@ -109,7 +109,7 @@ pub unsafe fn _blsfill_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsic))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsic_u32(x: u32) -> u32 { +pub fn _blsic_u32(x: u32) -> u32 { !x | x.wrapping_sub(1) } @@ -121,7 +121,7 @@ pub unsafe fn _blsic_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(t1mskc))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _t1mskc_u32(x: u32) -> u32 { +pub fn _t1mskc_u32(x: u32) -> u32 { !x | x.wrapping_add(1) } @@ -133,7 +133,7 @@ pub unsafe fn _t1mskc_u32(x: u32) -> u32 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(tzmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _tzmsk_u32(x: u32) -> u32 { +pub fn _tzmsk_u32(x: u32) -> u32 { !x & x.wrapping_sub(1) } diff --git a/stdarch/crates/core_arch/src/x86_64/tbm.rs b/stdarch/crates/core_arch/src/x86_64/tbm.rs index 002e0059160b7..f4bba709f6817 100644 --- a/stdarch/crates/core_arch/src/x86_64/tbm.rs +++ b/stdarch/crates/core_arch/src/x86_64/tbm.rs @@ -30,7 +30,7 @@ unsafe extern "C" { #[cfg_attr(test, assert_instr(bextr, CONTROL = 0x0404))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86_updates", since = "1.82.0")] -pub unsafe fn _bextri_u64(a: u64) -> u64 { +pub fn _bextri_u64(a: u64) -> u64 { static_assert_uimm_bits!(CONTROL, 16); unsafe { bextri_u64(a, CONTROL) } } @@ -42,7 +42,7 @@ pub unsafe fn _bextri_u64(a: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcfill))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcfill_u64(x: u64) -> u64 { +pub fn _blcfill_u64(x: u64) -> u64 { x & x.wrapping_add(1) } @@ -53,7 +53,7 @@ pub unsafe fn _blcfill_u64(x: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blci))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blci_u64(x: u64) -> u64 { +pub fn _blci_u64(x: u64) -> u64 { x | !x.wrapping_add(1) } @@ -64,7 +64,7 @@ pub unsafe fn _blci_u64(x: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcic))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcic_u64(x: u64) -> u64 { +pub fn _blcic_u64(x: u64) -> u64 { !x & x.wrapping_add(1) } @@ -76,7 +76,7 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcmsk_u64(x: u64) -> u64 { +pub fn _blcmsk_u64(x: u64) -> u64 { x ^ x.wrapping_add(1) } @@ -87,7 +87,7 @@ pub unsafe fn _blcmsk_u64(x: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcs))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blcs_u64(x: u64) -> u64 { +pub fn _blcs_u64(x: u64) -> u64 { x | x.wrapping_add(1) } @@ -98,7 +98,7 @@ pub unsafe fn _blcs_u64(x: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsfill))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsfill_u64(x: u64) -> u64 { +pub fn _blsfill_u64(x: u64) -> u64 { x | x.wrapping_sub(1) } @@ -109,7 +109,7 @@ pub unsafe fn _blsfill_u64(x: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsic))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsic_u64(x: u64) -> u64 { +pub fn _blsic_u64(x: u64) -> u64 { !x | x.wrapping_sub(1) } @@ -121,7 +121,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(t1mskc))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _t1mskc_u64(x: u64) -> u64 { +pub fn _t1mskc_u64(x: u64) -> u64 { !x | x.wrapping_add(1) } @@ -133,7 +133,7 @@ pub unsafe fn _t1mskc_u64(x: u64) -> u64 { #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(tzmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _tzmsk_u64(x: u64) -> u64 { +pub fn _tzmsk_u64(x: u64) -> u64 { !x & x.wrapping_sub(1) } From bd3f30c0d222088668b9bd65bbd4d1692e3d0eca Mon Sep 17 00:00:00 2001 From: sayantn Date: Sat, 30 Aug 2025 11:58:28 +0530 Subject: [PATCH 233/358] Make `_mm_prefetch` safe --- stdarch/crates/core_arch/src/x86/sse.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/sse.rs b/stdarch/crates/core_arch/src/x86/sse.rs index be5ce8191a5cf..9e34a95a5d20b 100644 --- a/stdarch/crates/core_arch/src/x86/sse.rs +++ b/stdarch/crates/core_arch/src/x86/sse.rs @@ -1887,6 +1887,8 @@ pub const _MM_HINT_ET1: i32 = 6; /// * Prefetching may also fail if there are not enough memory-subsystem /// resources (e.g., request buffers). /// +/// Note: this intrinsic is safe to use even though it takes a raw pointer argument. In general, this +/// cannot change the behavior of the program, including not trapping on invalid pointers. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_prefetch) #[inline] @@ -1897,11 +1899,13 @@ pub const _MM_HINT_ET1: i32 = 6; #[cfg_attr(test, assert_instr(prefetchnta, STRATEGY = _MM_HINT_NTA))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_prefetch(p: *const i8) { +pub fn _mm_prefetch(p: *const i8) { static_assert_uimm_bits!(STRATEGY, 3); // We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache). // `locality` and `rw` are based on our `STRATEGY`. - prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1); + unsafe { + prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1); + } } /// Returns vector of type __m128 with indeterminate elements.with indetermination elements. From a647a6f77f1be4bf1cb3d437468bfc9694ac2c7f Mon Sep 17 00:00:00 2001 From: sayantn Date: Sat, 30 Aug 2025 12:02:07 +0530 Subject: [PATCH 234/358] Make the fence intrinsics and `_mm_pause` safe --- stdarch/crates/core_arch/src/x86/sse.rs | 4 ++-- stdarch/crates/core_arch/src/x86/sse2.rs | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/stdarch/crates/core_arch/src/x86/sse.rs b/stdarch/crates/core_arch/src/x86/sse.rs index 9e34a95a5d20b..86f743e76d882 100644 --- a/stdarch/crates/core_arch/src/x86/sse.rs +++ b/stdarch/crates/core_arch/src/x86/sse.rs @@ -1445,8 +1445,8 @@ pub fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sfence))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sfence() { - sfence() +pub fn _mm_sfence() { + unsafe { sfence() } } /// Gets the unsigned 32-bit value of the MXCSR control and status register. diff --git a/stdarch/crates/core_arch/src/x86/sse2.rs b/stdarch/crates/core_arch/src/x86/sse2.rs index 2bdadd0b4b277..11335856fb22c 100644 --- a/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/stdarch/crates/core_arch/src/x86/sse2.rs @@ -19,10 +19,10 @@ use crate::{ #[inline] #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_pause() { +pub fn _mm_pause() { // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without // the SSE2 target-feature - therefore it does not require any target features - pause() + unsafe { pause() } } /// Invalidates and flushes the cache line that contains `p` from all levels of @@ -49,8 +49,8 @@ pub unsafe fn _mm_clflush(p: *const u8) { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(lfence))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_lfence() { - lfence() +pub fn _mm_lfence() { + unsafe { lfence() } } /// Performs a serializing operation on all load-from-memory and store-to-memory @@ -65,8 +65,8 @@ pub unsafe fn _mm_lfence() { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mfence))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mfence() { - mfence() +pub fn _mm_mfence() { + unsafe { mfence() } } /// Adds packed 8-bit integers in `a` and `b`. @@ -3149,7 +3149,7 @@ mod tests { #[test] fn test_mm_pause() { - unsafe { _mm_pause() } + _mm_pause() } #[simd_test(enable = "sse2")] From 9f18885a663da658b6a9b078936a0cc3b1e52d47 Mon Sep 17 00:00:00 2001 From: ltdk Date: Tue, 14 Oct 2025 20:23:39 -0400 Subject: [PATCH 235/358] const select_unpredictable --- core/src/hint.rs | 7 ++++++- core/src/intrinsics/mod.rs | 8 ++++++-- core/src/lib.rs | 1 + coretests/tests/hint.rs | 34 +++++++++++++++++++++------------- coretests/tests/lib.rs | 1 + 5 files changed, 35 insertions(+), 16 deletions(-) diff --git a/core/src/hint.rs b/core/src/hint.rs index 23cfdf5bfde2b..6efe95a9edce9 100644 --- a/core/src/hint.rs +++ b/core/src/hint.rs @@ -4,6 +4,7 @@ //! //! Hints may be compile time or runtime. +use crate::marker::Destruct; use crate::mem::MaybeUninit; use crate::{intrinsics, ub_checks}; @@ -771,7 +772,11 @@ pub const fn cold_path() { /// ``` #[inline(always)] #[stable(feature = "select_unpredictable", since = "1.88.0")] -pub fn select_unpredictable(condition: bool, true_val: T, false_val: T) -> T { +#[rustc_const_unstable(feature = "const_select_unpredictable", issue = "145938")] +pub const fn select_unpredictable(condition: bool, true_val: T, false_val: T) -> T +where + T: [const] Destruct, +{ // FIXME(https://github.com/rust-lang/unsafe-code-guidelines/issues/245): // Change this to use ManuallyDrop instead. let mut true_val = MaybeUninit::new(true_val); diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index c2da8a1a92096..4cee77fda4fba 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -55,7 +55,7 @@ #![allow(missing_docs)] use crate::ffi::va_list::{VaArgSafe, VaListImpl}; -use crate::marker::{ConstParamTy, DiscriminantKind, PointeeSized, Tuple}; +use crate::marker::{ConstParamTy, Destruct, DiscriminantKind, PointeeSized, Tuple}; use crate::ptr; mod bounds; @@ -477,11 +477,15 @@ pub const fn unlikely(b: bool) -> bool { /// However unlike the public form, the intrinsic will not drop the value that /// is not selected. #[unstable(feature = "core_intrinsics", issue = "none")] +#[rustc_const_unstable(feature = "const_select_unpredictable", issue = "145938")] #[rustc_intrinsic] #[rustc_nounwind] #[miri::intrinsic_fallback_is_spec] #[inline] -pub fn select_unpredictable(b: bool, true_val: T, false_val: T) -> T { +pub const fn select_unpredictable(b: bool, true_val: T, false_val: T) -> T +where + T: [const] Destruct, +{ if b { true_val } else { false_val } } diff --git a/core/src/lib.rs b/core/src/lib.rs index bc428c37a88f9..e213e1d91a75d 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -106,6 +106,7 @@ #![feature(const_cmp)] #![feature(const_destruct)] #![feature(const_eval_select)] +#![feature(const_select_unpredictable)] #![feature(core_intrinsics)] #![feature(coverage_attribute)] #![feature(disjoint_bitor)] diff --git a/coretests/tests/hint.rs b/coretests/tests/hint.rs index 24de27b24b802..d15730823eb5b 100644 --- a/coretests/tests/hint.rs +++ b/coretests/tests/hint.rs @@ -1,25 +1,33 @@ #[test] fn select_unpredictable_drop() { use core::cell::Cell; + struct X<'a>(&'a Cell); - impl Drop for X<'_> { + impl const Drop for X<'_> { fn drop(&mut self) { self.0.set(true); } } - let a_dropped = Cell::new(false); - let b_dropped = Cell::new(false); - let a = X(&a_dropped); - let b = X(&b_dropped); - assert!(!a_dropped.get()); - assert!(!b_dropped.get()); - let selected = core::hint::select_unpredictable(core::hint::black_box(true), a, b); - assert!(!a_dropped.get()); - assert!(b_dropped.get()); - drop(selected); - assert!(a_dropped.get()); - assert!(b_dropped.get()); + const fn do_test() { + let a_dropped = Cell::new(false); + let b_dropped = Cell::new(false); + let a = X(&a_dropped); + let b = X(&b_dropped); + assert!(!a_dropped.get()); + assert!(!b_dropped.get()); + let selected = core::hint::select_unpredictable(core::hint::black_box(true), a, b); + assert!(!a_dropped.get()); + assert!(b_dropped.get()); + drop(selected); + assert!(a_dropped.get()); + assert!(b_dropped.get()); + } + + do_test(); + const { + do_test(); + } } #[test] diff --git a/coretests/tests/lib.rs b/coretests/tests/lib.rs index 5c2522acb1362..24e59d7cd73b6 100644 --- a/coretests/tests/lib.rs +++ b/coretests/tests/lib.rs @@ -27,6 +27,7 @@ #![feature(const_option_ops)] #![feature(const_ref_cell)] #![feature(const_result_trait_fn)] +#![feature(const_select_unpredictable)] #![feature(const_trait_impl)] #![feature(control_flow_ok)] #![feature(core_float_math)] From 456bad3ed5cfa21e05d90ecd57c3556b494a9817 Mon Sep 17 00:00:00 2001 From: Noa Date: Fri, 24 Oct 2025 18:18:04 -0500 Subject: [PATCH 236/358] Enable assert_instr for wasm32 throw --- stdarch/ci/docker/wasm32-wasip1/Dockerfile | 4 ++-- stdarch/crates/core_arch/src/wasm32/mod.rs | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/stdarch/ci/docker/wasm32-wasip1/Dockerfile b/stdarch/ci/docker/wasm32-wasip1/Dockerfile index f618b94291f5d..0527c0df1777a 100644 --- a/stdarch/ci/docker/wasm32-wasip1/Dockerfile +++ b/stdarch/ci/docker/wasm32-wasip1/Dockerfile @@ -7,9 +7,9 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends \ xz-utils \ clang -ENV VERSION=v34.0.1 +ENV VERSION=v38.0.3 RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/${VERSION}/wasmtime-${VERSION}-x86_64-linux.tar.xz | tar xJf - ENV PATH=$PATH:/wasmtime-${VERSION}-x86_64-linux -ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime --dir /checkout/target/wasm32-wasip1/release/deps::." +ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime -Wexceptions --dir /checkout/target/wasm32-wasip1/release/deps::." diff --git a/stdarch/crates/core_arch/src/wasm32/mod.rs b/stdarch/crates/core_arch/src/wasm32/mod.rs index 01bf0a71658b8..82674a0d0b7f2 100644 --- a/stdarch/crates/core_arch/src/wasm32/mod.rs +++ b/stdarch/crates/core_arch/src/wasm32/mod.rs @@ -185,10 +185,7 @@ unsafe extern "C-unwind" { /// /// [`throw`]: https://webassembly.github.io/exception-handling/core/syntax/instructions.html#syntax-instr-control /// [exception-handling proposal]: https://github.com/WebAssembly/exception-handling -// FIXME: wasmtime does not currently support exception-handling, so cannot execute -// a wasm module with the throw instruction in it. once it does, we can -// reenable this attribute. -// #[cfg_attr(test, assert_instr(throw, TAG = 0, ptr = core::ptr::null_mut()))] +#[cfg_attr(test, assert_instr(throw, TAG = 0, ptr = core::ptr::null_mut()))] #[inline] #[unstable(feature = "wasm_exception_handling_intrinsics", issue = "122465")] // FIXME: Since this instruction unwinds, `core` built with `-C panic=unwind` From f2425321570abb4b1958dc96e6121eb9bd9d2179 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Tue, 28 Oct 2025 00:47:29 -0700 Subject: [PATCH 237/358] Start documenting autodiff activities --- core/src/macros/mod.rs | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/core/src/macros/mod.rs b/core/src/macros/mod.rs index df24dd43b82eb..7d7c4147983ce 100644 --- a/core/src/macros/mod.rs +++ b/core/src/macros/mod.rs @@ -1499,6 +1499,25 @@ pub(crate) mod builtin { /// - `INPUT_ACTIVITIES`: Specifies one valid activity for each input parameter. /// - `OUTPUT_ACTIVITY`: Must not be set if the function implicitly returns nothing /// (or explicitly returns `-> ()`). Otherwise, it must be set to one of the allowed activities. + /// + /// ACTIVITIES might either be `Dual` or `Const`, more options will be exposed later. + /// + /// `Const` should be used on non-float arguments, or float-based arguments as an optimization + /// if we are not interested in computing the derivatives with respect to this argument. + /// + /// `Dual` can be used for float scalar values or for references, raw pointers, or other + /// indirect input arguments. It can also be used on a scalar float return value. + /// If used on a return value, the generated function will return a tuple of two float scalars. + /// If used on an input argument, a new shadow argument of the same type will be created, + /// directly following the original argument. + /// + /// We might want to track how one input float affects one or more output floats. In this case, + /// the shadow of one input should be initialized to `1.0`, while the shadows of the other + /// inputs should be initialized to `0.0`. The shadow of the output(s) should be initialized to + /// `0.0`. After calling the generated function, the shadow of the input will be zeroed, + /// while the shadow(s) of the output(s) will contain the derivatives. Forward mode is generally + /// more efficient if we have more output floats marked as `Dual` than input floats. + /// Related information can also be found unter the term "Vector-Jacobian product" (VJP). #[unstable(feature = "autodiff", issue = "124509")] #[allow_internal_unstable(rustc_attrs)] #[allow_internal_unstable(core_intrinsics)] @@ -1518,6 +1537,34 @@ pub(crate) mod builtin { /// - `INPUT_ACTIVITIES`: Specifies one valid activity for each input parameter. /// - `OUTPUT_ACTIVITY`: Must not be set if the function implicitly returns nothing /// (or explicitly returns `-> ()`). Otherwise, it must be set to one of the allowed activities. + /// + /// ACTIVITIES might either be `Active`, `Duplicated` or `Const`, more options will be exposed later. + /// + /// `Active` can be used for float scalar values. + /// If used on an input, a new float will be appended to the return tuple of the generated + /// function. If the function returns a float scalar, `Active` can be used for the return as + /// well. In this case a float scalar will be appended to the argument list, it works as seed. + /// + /// `Duplicated` can be used on references, raw pointers, or other indirect input + /// arguments. It creates a new shadow argument of the same type, following the original argument. + /// A const reference or pointer argument will receive a mutable reference or pointer as shadow. + /// + /// `Const` should be used on non-float arguments, or float-based arguments as an optimization + /// if we are not interested in computing the derivatives with respect to this argument. + /// + /// We often want to track how one or more input floats affect one output float. This output can + /// be a scalar return value, or a mutable reference or pointer argument. In this case, the + /// shadow of the input should be marked as duplicated and initialized to `0.0`. The shadow of + /// the output should be marked as active or duplicated and initialized to `1.0`. After calling + /// the generated function, the shadow(s) of the input(s) will contain the derivatives. If the + /// function has more than one output float marked as active or duplicated, users might want to + /// set one of them to `1.0` and the others to `0.0` to compute partial derivatives. + /// Unlike forward-mode, a call to the generated function does not reset the shadow of the + /// inputs. + /// Reverse mode is generally more efficient if we have more active/duplicated input than + /// output floats. + /// + /// Related information can also be found unter the term "Jacobian-Vector Product" (JVP). #[unstable(feature = "autodiff", issue = "124509")] #[allow_internal_unstable(rustc_attrs)] #[allow_internal_unstable(core_intrinsics)] From e776261a5034542bc0cd33645e135a97dd9926e0 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Mon, 27 Oct 2025 10:27:05 -0700 Subject: [PATCH 238/358] Update CURRENT_RUSTC_VERSION post-bump (cherry picked from commit 813072186c1c305ea62c7270f1514dfab5166af2) --- alloc/src/boxed.rs | 4 ++-- alloc/src/collections/btree/map/entry.rs | 4 ++-- alloc/src/rc.rs | 4 ++-- alloc/src/sync.rs | 4 ++-- core/src/num/nonzero.rs | 7 ++----- core/src/panic/location.rs | 4 ++-- core/src/slice/mod.rs | 4 ++-- core/src/wtf8.rs | 2 +- proc_macro/src/lib.rs | 2 +- std/src/sync/poison/rwlock.rs | 2 +- 10 files changed, 17 insertions(+), 20 deletions(-) diff --git a/alloc/src/boxed.rs b/alloc/src/boxed.rs index ae43fbfe1d69e..7d8077f231dde 100644 --- a/alloc/src/boxed.rs +++ b/alloc/src/boxed.rs @@ -300,7 +300,7 @@ impl Box { /// [zeroed]: mem::MaybeUninit::zeroed #[cfg(not(no_global_oom_handling))] #[inline] - #[stable(feature = "new_zeroed_alloc", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "new_zeroed_alloc", since = "1.92.0")] #[must_use] pub fn new_zeroed() -> Box> { Self::new_zeroed_in(Global) @@ -692,7 +692,7 @@ impl Box<[T]> { /// /// [zeroed]: mem::MaybeUninit::zeroed #[cfg(not(no_global_oom_handling))] - #[stable(feature = "new_zeroed_alloc", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "new_zeroed_alloc", since = "1.92.0")] #[must_use] pub fn new_zeroed_slice(len: usize) -> Box<[mem::MaybeUninit]> { unsafe { RawVec::with_capacity_zeroed(len).into_box(len) } diff --git a/alloc/src/collections/btree/map/entry.rs b/alloc/src/collections/btree/map/entry.rs index df51be3de54b9..add8782a9499a 100644 --- a/alloc/src/collections/btree/map/entry.rs +++ b/alloc/src/collections/btree/map/entry.rs @@ -284,7 +284,7 @@ impl<'a, K: Ord, V, A: Allocator + Clone> Entry<'a, K, V, A> { /// assert_eq!(entry.key(), &"poneyland"); /// ``` #[inline] - #[stable(feature = "btree_entry_insert", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "btree_entry_insert", since = "1.92.0")] pub fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V, A> { match self { Occupied(mut entry) => { @@ -394,7 +394,7 @@ impl<'a, K: Ord, V, A: Allocator + Clone> VacantEntry<'a, K, V, A> { /// } /// assert_eq!(map["poneyland"], 37); /// ``` - #[stable(feature = "btree_entry_insert", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "btree_entry_insert", since = "1.92.0")] pub fn insert_entry(mut self, value: V) -> OccupiedEntry<'a, K, V, A> { let handle = match self.handle { None => { diff --git a/alloc/src/rc.rs b/alloc/src/rc.rs index 2b62b92d43886..0baae0b314eb2 100644 --- a/alloc/src/rc.rs +++ b/alloc/src/rc.rs @@ -529,7 +529,7 @@ impl Rc { /// /// [zeroed]: mem::MaybeUninit::zeroed #[cfg(not(no_global_oom_handling))] - #[stable(feature = "new_zeroed_alloc", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "new_zeroed_alloc", since = "1.92.0")] #[must_use] pub fn new_zeroed() -> Rc> { unsafe { @@ -1057,7 +1057,7 @@ impl Rc<[T]> { /// /// [zeroed]: mem::MaybeUninit::zeroed #[cfg(not(no_global_oom_handling))] - #[stable(feature = "new_zeroed_alloc", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "new_zeroed_alloc", since = "1.92.0")] #[must_use] pub fn new_zeroed_slice(len: usize) -> Rc<[mem::MaybeUninit]> { unsafe { diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs index c78f2c8a47e00..c6b85ca5b30b3 100644 --- a/alloc/src/sync.rs +++ b/alloc/src/sync.rs @@ -536,7 +536,7 @@ impl Arc { /// [zeroed]: mem::MaybeUninit::zeroed #[cfg(not(no_global_oom_handling))] #[inline] - #[stable(feature = "new_zeroed_alloc", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "new_zeroed_alloc", since = "1.92.0")] #[must_use] pub fn new_zeroed() -> Arc> { unsafe { @@ -1205,7 +1205,7 @@ impl Arc<[T]> { /// [zeroed]: mem::MaybeUninit::zeroed #[cfg(not(no_global_oom_handling))] #[inline] - #[stable(feature = "new_zeroed_alloc", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "new_zeroed_alloc", since = "1.92.0")] #[must_use] pub fn new_zeroed_slice(len: usize) -> Arc<[mem::MaybeUninit]> { unsafe { diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs index fcdb65bd45c95..efb0665b7f461 100644 --- a/core/src/num/nonzero.rs +++ b/core/src/num/nonzero.rs @@ -1382,11 +1382,8 @@ macro_rules! nonzero_integer_signedness_dependent_impls { #[doc = concat!("let three = NonZero::new(3", stringify!($Int), ").unwrap();")] /// assert_eq!(three.div_ceil(two), two); /// ``` - #[stable(feature = "unsigned_nonzero_div_ceil", since = "CURRENT_RUSTC_VERSION")] - #[rustc_const_stable( - feature = "unsigned_nonzero_div_ceil", - since = "CURRENT_RUSTC_VERSION" - )] + #[stable(feature = "unsigned_nonzero_div_ceil", since = "1.92.0")] + #[rustc_const_stable(feature = "unsigned_nonzero_div_ceil", since = "1.92.0")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] diff --git a/core/src/panic/location.rs b/core/src/panic/location.rs index 5935849344475..8176af03d13a5 100644 --- a/core/src/panic/location.rs +++ b/core/src/panic/location.rs @@ -194,8 +194,8 @@ impl<'a> Location<'a> { /// `std::source_location::file_name`, both of which return a nul-terminated `const char*`. #[must_use] #[inline] - #[stable(feature = "file_with_nul", since = "CURRENT_RUSTC_VERSION")] - #[rustc_const_stable(feature = "file_with_nul", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "file_with_nul", since = "1.92.0")] + #[rustc_const_stable(feature = "file_with_nul", since = "1.92.0")] pub const fn file_as_c_str(&self) -> &'a CStr { let filename = self.filename.as_ptr(); diff --git a/core/src/slice/mod.rs b/core/src/slice/mod.rs index 61eb78294f68b..0248688733952 100644 --- a/core/src/slice/mod.rs +++ b/core/src/slice/mod.rs @@ -3629,7 +3629,7 @@ impl [T] { /// assert_eq!(a, ['a', 'c', 'd', 'e', 'b', 'f']); /// ``` #[stable(feature = "slice_rotate", since = "1.26.0")] - #[rustc_const_stable(feature = "const_slice_rotate", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "const_slice_rotate", since = "1.92.0")] pub const fn rotate_left(&mut self, mid: usize) { assert!(mid <= self.len()); let k = self.len() - mid; @@ -3675,7 +3675,7 @@ impl [T] { /// assert_eq!(a, ['a', 'e', 'b', 'c', 'd', 'f']); /// ``` #[stable(feature = "slice_rotate", since = "1.26.0")] - #[rustc_const_stable(feature = "const_slice_rotate", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "const_slice_rotate", since = "1.92.0")] pub const fn rotate_right(&mut self, k: usize) { assert!(k <= self.len()); let mid = self.len() - k; diff --git a/core/src/wtf8.rs b/core/src/wtf8.rs index 0c03496c5e367..11cd2b8776f22 100644 --- a/core/src/wtf8.rs +++ b/core/src/wtf8.rs @@ -565,7 +565,7 @@ impl Iterator for EncodeWide<'_> { #[stable(feature = "encode_wide_fused_iterator", since = "1.62.0")] impl FusedIterator for EncodeWide<'_> {} -#[stable(feature = "encode_wide_debug", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "encode_wide_debug", since = "1.92.0")] impl fmt::Debug for EncodeWide<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { struct CodeUnit(u16); diff --git a/proc_macro/src/lib.rs b/proc_macro/src/lib.rs index 1aa6064633c3b..4efdfcad924b5 100644 --- a/proc_macro/src/lib.rs +++ b/proc_macro/src/lib.rs @@ -379,7 +379,7 @@ impl Extend for TokenStream { macro_rules! extend_items { ($($item:ident)*) => { $( - #[stable(feature = "token_stream_extend_tt_items", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "token_stream_extend_tt_items", since = "1.92.0")] impl Extend<$item> for TokenStream { fn extend>(&mut self, iter: T) { self.extend(iter.into_iter().map(TokenTree::$item)); diff --git a/std/src/sync/poison/rwlock.rs b/std/src/sync/poison/rwlock.rs index fe51d8975e423..10e45bc8c11a3 100644 --- a/std/src/sync/poison/rwlock.rs +++ b/std/src/sync/poison/rwlock.rs @@ -859,7 +859,7 @@ impl<'rwlock, T: ?Sized> RwLockWriteGuard<'rwlock, T> { /// # let final_check = rw.read().unwrap(); /// # assert_eq!(*final_check, 3); /// ``` - #[stable(feature = "rwlock_downgrade", since = "CURRENT_RUSTC_VERSION")] + #[stable(feature = "rwlock_downgrade", since = "1.92.0")] pub fn downgrade(s: Self) -> RwLockReadGuard<'rwlock, T> { let lock = s.lock; From f8c098e24849eef92bcb817a6e95ba5966e91ba4 Mon Sep 17 00:00:00 2001 From: nxsaken Date: Thu, 30 Oct 2025 13:54:46 +0400 Subject: [PATCH 239/358] Constify `ControlFlow` methods (no unstable features) --- core/src/ops/control_flow.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/ops/control_flow.rs b/core/src/ops/control_flow.rs index b760a7c4e21eb..e426ff7cc057a 100644 --- a/core/src/ops/control_flow.rs +++ b/core/src/ops/control_flow.rs @@ -150,7 +150,8 @@ impl ControlFlow { /// ``` #[inline] #[stable(feature = "control_flow_enum_is", since = "1.59.0")] - pub fn is_break(&self) -> bool { + #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + pub const fn is_break(&self) -> bool { matches!(*self, ControlFlow::Break(_)) } @@ -166,7 +167,8 @@ impl ControlFlow { /// ``` #[inline] #[stable(feature = "control_flow_enum_is", since = "1.59.0")] - pub fn is_continue(&self) -> bool { + #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + pub const fn is_continue(&self) -> bool { matches!(*self, ControlFlow::Continue(_)) } @@ -257,7 +259,8 @@ impl ControlFlow { /// ``` #[inline] #[unstable(feature = "control_flow_ok", issue = "140266")] - pub fn break_ok(self) -> Result { + #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + pub const fn break_ok(self) -> Result { match self { ControlFlow::Continue(c) => Err(c), ControlFlow::Break(b) => Ok(b), @@ -361,7 +364,8 @@ impl ControlFlow { /// ``` #[inline] #[unstable(feature = "control_flow_ok", issue = "140266")] - pub fn continue_ok(self) -> Result { + #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + pub const fn continue_ok(self) -> Result { match self { ControlFlow::Continue(c) => Ok(c), ControlFlow::Break(b) => Err(b), From afa6a6f669a55f82536709bdef48ddcc9f7b8b27 Mon Sep 17 00:00:00 2001 From: Alisa Sireneva Date: Thu, 30 Oct 2025 12:22:19 +0300 Subject: [PATCH 240/358] Move wasm `throw` intrinsic back to `unwind` rustc assumes that regular `extern "Rust"` functions unwind only if the `unwind` panic runtime is linked. `throw` was annotated as such, but unwound unconditionally. This could cause UB when a crate built with `-C panic=abort` called `throw` from `core` built with `-C panic=unwind`, since no terminator was added to handle the panic arising from calling an allegedly non-unwinding `extern "Rust"` function. rustc was taught to recognize this condition since https://github.com/rust-lang/rust/pull/144225 and prevented such linkage, but this caused regressions in https://github.com/rust-lang/rust/issues/148246, since this meant that Emscripten projects could not be built with `-C panic=abort` without recompiling std. The most straightforward solution would be to move `throw` into the `panic_unwind` crate, so that it's only compiled if the panic runtime is guaranteed to be `unwind`, but this is messy due to our architecture. Instead, move it into `unwind::wasm`, which is only compiled for bare-metal targets that default to `panic = "abort"`, rendering the issue moot. --- stdarch/crates/core_arch/src/wasm32/mod.rs | 32 ----------------- unwind/src/lib.rs | 2 +- unwind/src/wasm.rs | 40 ++++++++++++++++------ 3 files changed, 30 insertions(+), 44 deletions(-) diff --git a/stdarch/crates/core_arch/src/wasm32/mod.rs b/stdarch/crates/core_arch/src/wasm32/mod.rs index 01bf0a71658b8..57c9157bede89 100644 --- a/stdarch/crates/core_arch/src/wasm32/mod.rs +++ b/stdarch/crates/core_arch/src/wasm32/mod.rs @@ -173,35 +173,3 @@ pub fn f64_nearest(a: f64) -> f64 { pub fn f64_sqrt(a: f64) -> f64 { crate::intrinsics::sqrtf64(a) } - -unsafe extern "C-unwind" { - #[link_name = "llvm.wasm.throw"] - fn wasm_throw(tag: i32, ptr: *mut u8) -> !; -} - -/// Generates the [`throw`] instruction from the [exception-handling proposal] for WASM. -/// -/// This function is unlikely to be stabilized until codegen backends have better support. -/// -/// [`throw`]: https://webassembly.github.io/exception-handling/core/syntax/instructions.html#syntax-instr-control -/// [exception-handling proposal]: https://github.com/WebAssembly/exception-handling -// FIXME: wasmtime does not currently support exception-handling, so cannot execute -// a wasm module with the throw instruction in it. once it does, we can -// reenable this attribute. -// #[cfg_attr(test, assert_instr(throw, TAG = 0, ptr = core::ptr::null_mut()))] -#[inline] -#[unstable(feature = "wasm_exception_handling_intrinsics", issue = "122465")] -// FIXME: Since this instruction unwinds, `core` built with `-C panic=unwind` -// cannot be linked with `-C panic=abort` programs. But that's not -// entirely supported anyway, because runtimes without EH support won't -// be able to handle `try` blocks in `-C panic=unwind` crates either. -// We ship `-C panic=abort` `core`, so this doesn't affect users -// directly. Resolving this will likely require patching out both `try` -// and `throw` instructions, at which point we can look into whitelisting -// this function in the compiler to allow linking. -// See https://github.com/rust-lang/rust/issues/118168. -#[allow(ffi_unwind_calls)] -pub unsafe fn throw(ptr: *mut u8) -> ! { - static_assert!(TAG == 0); // LLVM only supports tag 0 == C++ right now. - wasm_throw(TAG, ptr) -} diff --git a/unwind/src/lib.rs b/unwind/src/lib.rs index cd3a2f33ffa56..e3a0a77f53f08 100644 --- a/unwind/src/lib.rs +++ b/unwind/src/lib.rs @@ -7,7 +7,7 @@ #![cfg_attr(not(target_env = "msvc"), feature(libc))] #![cfg_attr( all(target_family = "wasm", any(not(target_os = "emscripten"), emscripten_wasm_eh)), - feature(simd_wasm64, wasm_exception_handling_intrinsics) + feature(link_llvm_intrinsics, simd_wasm64) )] #![allow(internal_features)] #![deny(unsafe_op_in_unsafe_fn)] diff --git a/unwind/src/wasm.rs b/unwind/src/wasm.rs index 3341e54759a0d..2bff306af293f 100644 --- a/unwind/src/wasm.rs +++ b/unwind/src/wasm.rs @@ -40,22 +40,40 @@ pub unsafe fn _Unwind_DeleteException(exception: *mut _Unwind_Exception) { } pub unsafe fn _Unwind_RaiseException(exception: *mut _Unwind_Exception) -> _Unwind_Reason_Code { - // The wasm `throw` instruction takes a "tag", which differentiates certain - // types of exceptions from others. LLVM currently just identifies these - // via integers, with 0 corresponding to C++ exceptions and 1 to C setjmp()/longjmp(). - // Ideally, we'd be able to choose something unique for Rust, but for now, - // we pretend to be C++ and implement the Itanium exception-handling ABI. + // This implementation is only used for `wasm*-unknown-unknown` targets. Such targets are not + // guaranteed to support exceptions, and they default to `-C panic=abort`. Because an unknown + // instruction is a load-time error on wasm, instead of a runtime error like on traditional + // architectures, we never want to codegen a `throw` instruction unless the user explicitly + // enabled exceptions via `-Z build-std` with `-C panic=unwind`. cfg_select! { - // panic=abort is default for wasm targets. Because an unknown instruction is a load-time - // error on wasm, instead of a runtime error like on traditional architectures, we never - // want to codegen a `throw` instruction, as that would break users using runtimes that - // don't yet support exceptions. The only time this first branch would be selected is if - // the user explicitly opts in to wasm exceptions, via -Zbuild-std with -Cpanic=unwind. panic = "unwind" => { + // It's important that this intrinsic is defined here rather than in `core`. Since it + // unwinds, invoking it from Rust code compiled with `-C panic=unwind` immediately + // forces `panic_unwind` as the required panic runtime. + // + // We ship unwinding `core` on Emscripten, so making this intrinsic part of `core` would + // prevent linking precompiled `core` into `-C panic=abort` binaries. Unlike `core`, + // this particular module is never precompiled with `-C panic=unwind` because it's only + // used for bare-metal targets, so an error can only arise if the user both manually + // recompiles `std` with `-C panic=unwind` and manually compiles the binary crate with + // `-C panic=abort`, which we don't care to support. + // + // See https://github.com/rust-lang/rust/issues/148246. + unsafe extern "C-unwind" { + /// LLVM lowers this intrinsic to the `throw` instruction. + #[link_name = "llvm.wasm.throw"] + fn wasm_throw(tag: i32, ptr: *mut u8) -> !; + } + + // The wasm `throw` instruction takes a "tag", which differentiates certain types of + // exceptions from others. LLVM currently just identifies these via integers, with 0 + // corresponding to C++ exceptions and 1 to C setjmp()/longjmp(). Ideally, we'd be able + // to choose something unique for Rust, but for now, we pretend to be C++ and implement + // the Itanium exception-handling ABI. // corresponds with llvm::WebAssembly::Tag::CPP_EXCEPTION // in llvm-project/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h const CPP_EXCEPTION_TAG: i32 = 0; - core::arch::wasm::throw::(exception.cast()) + wasm_throw(CPP_EXCEPTION_TAG, exception.cast()) } _ => { let _ = exception; From 02fed3811650f6edbb773063ec2eaebe43070448 Mon Sep 17 00:00:00 2001 From: Shun Sakai Date: Fri, 31 Oct 2025 14:42:03 +0900 Subject: [PATCH 241/358] docs: Fix argument names for `carrying_mul_add` --- core/src/num/int_macros.rs | 2 +- core/src/num/uint_macros.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index c3460a6409069..509d7e335fb23 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -2705,7 +2705,7 @@ macro_rules! int_impl { Self::carrying_mul_add(self, rhs, carry, 0) } - /// Calculates the "full multiplication" `self * rhs + carry1 + carry2` + /// Calculates the "full multiplication" `self * rhs + carry + add` /// without the possibility to overflow. /// /// This returns the low-order (wrapping) bits and the high-order (overflow) bits diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index b5b768cf677aa..793d84d5139cc 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -3000,7 +3000,7 @@ macro_rules! uint_impl { Self::carrying_mul_add(self, rhs, carry, 0) } - /// Calculates the "full multiplication" `self * rhs + carry1 + carry2`. + /// Calculates the "full multiplication" `self * rhs + carry + add`. /// /// This returns the low-order (wrapping) bits and the high-order (overflow) bits /// of the result as two separate values, in that order. From 3402e408e6fc46791d61a1bbb4d4fd5bda587dd8 Mon Sep 17 00:00:00 2001 From: Emily Albini Date: Fri, 31 Oct 2025 10:07:23 +0100 Subject: [PATCH 242/358] enable flock for illumos --- std/src/fs/tests.rs | 6 ++++++ std/src/sys/fs/unix.rs | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/std/src/fs/tests.rs b/std/src/fs/tests.rs index 4d67ba9248998..0517760c35501 100644 --- a/std/src/fs/tests.rs +++ b/std/src/fs/tests.rs @@ -5,6 +5,7 @@ use rand::RngCore; target_os = "freebsd", target_os = "linux", target_os = "netbsd", + target_os = "illumos", target_vendor = "apple", ))] use crate::assert_matches::assert_matches; @@ -14,6 +15,7 @@ use crate::char::MAX_LEN_UTF8; target_os = "freebsd", target_os = "linux", target_os = "netbsd", + target_os = "illumos", target_vendor = "apple", ))] use crate::fs::TryLockError; @@ -227,6 +229,7 @@ fn file_test_io_seek_and_write() { target_os = "linux", target_os = "netbsd", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", ))] fn file_lock_multiple_shared() { @@ -251,6 +254,7 @@ fn file_lock_multiple_shared() { target_os = "linux", target_os = "netbsd", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", ))] fn file_lock_blocking() { @@ -276,6 +280,7 @@ fn file_lock_blocking() { target_os = "linux", target_os = "netbsd", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", ))] fn file_lock_drop() { @@ -298,6 +303,7 @@ fn file_lock_drop() { target_os = "linux", target_os = "netbsd", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", ))] fn file_lock_dup() { diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index d9a7fcb0e2d39..3efe67390d7dd 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -1292,6 +1292,7 @@ impl File { target_os = "netbsd", target_os = "openbsd", target_os = "cygwin", + target_os = "illumos", target_vendor = "apple", ))] pub fn lock(&self) -> io::Result<()> { @@ -1316,6 +1317,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", )))] pub fn lock(&self) -> io::Result<()> { @@ -1329,6 +1331,7 @@ impl File { target_os = "netbsd", target_os = "openbsd", target_os = "cygwin", + target_os = "illumos", target_vendor = "apple", ))] pub fn lock_shared(&self) -> io::Result<()> { @@ -1353,6 +1356,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", )))] pub fn lock_shared(&self) -> io::Result<()> { @@ -1366,6 +1370,7 @@ impl File { target_os = "netbsd", target_os = "openbsd", target_os = "cygwin", + target_os = "illumos", target_vendor = "apple", ))] pub fn try_lock(&self) -> Result<(), TryLockError> { @@ -1406,6 +1411,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", )))] pub fn try_lock(&self) -> Result<(), TryLockError> { @@ -1422,6 +1428,7 @@ impl File { target_os = "netbsd", target_os = "openbsd", target_os = "cygwin", + target_os = "illumos", target_vendor = "apple", ))] pub fn try_lock_shared(&self) -> Result<(), TryLockError> { @@ -1462,6 +1469,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", )))] pub fn try_lock_shared(&self) -> Result<(), TryLockError> { @@ -1478,6 +1486,7 @@ impl File { target_os = "netbsd", target_os = "openbsd", target_os = "cygwin", + target_os = "illumos", target_vendor = "apple", ))] pub fn unlock(&self) -> io::Result<()> { @@ -1502,6 +1511,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "solaris", + target_os = "illumos", target_vendor = "apple", )))] pub fn unlock(&self) -> io::Result<()> { From dbe9fa7ec7ca4707ccad26afe6ce9d4f5a185802 Mon Sep 17 00:00:00 2001 From: tison Date: Thu, 16 Oct 2025 23:05:12 +0800 Subject: [PATCH 243/358] Implement VecDeque::extract_if Signed-off-by: tison --- alloc/src/collections/vec_deque/extract_if.rs | 149 ++++++++++ alloc/src/collections/vec_deque/mod.rs | 94 ++++++ alloc/src/collections/vec_deque/tests.rs | 272 +++++++++++++++++- 3 files changed, 514 insertions(+), 1 deletion(-) create mode 100644 alloc/src/collections/vec_deque/extract_if.rs diff --git a/alloc/src/collections/vec_deque/extract_if.rs b/alloc/src/collections/vec_deque/extract_if.rs new file mode 100644 index 0000000000000..bed7d46482cf4 --- /dev/null +++ b/alloc/src/collections/vec_deque/extract_if.rs @@ -0,0 +1,149 @@ +use core::ops::{Range, RangeBounds}; +use core::{fmt, ptr, slice}; + +use super::VecDeque; +use crate::alloc::{Allocator, Global}; + +/// An iterator which uses a closure to determine if an element should be removed. +/// +/// This struct is created by [`VecDeque::extract_if`]. +/// See its documentation for more. +/// +/// # Example +/// +/// ``` +/// #![feature(vec_deque_extract_if)] +/// +/// use std::collections::vec_deque::ExtractIf; +/// use std::collections::vec_deque::VecDeque; +/// +/// let mut v = VecDeque::from([0, 1, 2]); +/// let iter: ExtractIf<'_, _, _> = v.extract_if(.., |x| *x % 2 == 0); +/// ``` +#[unstable(feature = "vec_deque_extract_if", issue = "147750")] +#[must_use = "iterators are lazy and do nothing unless consumed"] +pub struct ExtractIf< + 'a, + T, + F, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +> { + vec: &'a mut VecDeque, + /// The index of the item that will be inspected by the next call to `next`. + idx: usize, + /// Elements at and beyond this point will be retained. Must be equal or smaller than `old_len`. + end: usize, + /// The number of items that have been drained (removed) thus far. + del: usize, + /// The original length of `vec` prior to draining. + old_len: usize, + /// The filter test predicate. + pred: F, +} + +impl<'a, T, F, A: Allocator> ExtractIf<'a, T, F, A> { + pub(super) fn new>( + vec: &'a mut VecDeque, + pred: F, + range: R, + ) -> Self { + let old_len = vec.len(); + let Range { start, end } = slice::range(range, ..old_len); + + // Guard against the deque getting leaked (leak amplification) + vec.len = 0; + ExtractIf { vec, idx: start, del: 0, end, old_len, pred } + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + self.vec.allocator() + } +} + +#[unstable(feature = "vec_deque_extract_if", issue = "147750")] +impl Iterator for ExtractIf<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + type Item = T; + + fn next(&mut self) -> Option { + while self.idx < self.end { + let i = self.idx; + // SAFETY: + // We know that `i < self.end` from the if guard and that `self.end <= self.old_len` from + // the validity of `Self`. Therefore `i` points to an element within `vec`. + // + // Additionally, the i-th element is valid because each element is visited at most once + // and it is the first time we access vec[i]. + // + // Note: we can't use `vec.get_mut(i).unwrap()` here since the precondition for that + // function is that i < vec.len, but we've set vec's length to zero. + let idx = self.vec.to_physical_idx(i); + let cur = unsafe { &mut *self.vec.ptr().add(idx) }; + let drained = (self.pred)(cur); + // Update the index *after* the predicate is called. If the index + // is updated prior and the predicate panics, the element at this + // index would be leaked. + self.idx += 1; + if drained { + self.del += 1; + // SAFETY: We never touch this element again after returning it. + return Some(unsafe { ptr::read(cur) }); + } else if self.del > 0 { + let hole_slot = self.vec.to_physical_idx(i - self.del); + // SAFETY: `self.del` > 0, so the hole slot must not overlap with current element. + // We use copy for move, and never touch this element again. + unsafe { self.vec.wrap_copy(idx, hole_slot, 1) }; + } + } + None + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.end - self.idx)) + } +} + +#[unstable(feature = "vec_deque_extract_if", issue = "147750")] +impl Drop for ExtractIf<'_, T, F, A> { + fn drop(&mut self) { + if self.del > 0 { + let src = self.vec.to_physical_idx(self.idx); + let dst = self.vec.to_physical_idx(self.idx - self.del); + let len = self.old_len - self.idx; + // SAFETY: Trailing unchecked items must be valid since we never touch them. + unsafe { self.vec.wrap_copy(src, dst, len) }; + } + self.vec.len = self.old_len - self.del; + } +} + +#[unstable(feature = "vec_deque_extract_if", issue = "147750")] +impl fmt::Debug for ExtractIf<'_, T, F, A> +where + T: fmt::Debug, + A: Allocator, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let peek = if self.idx < self.end { + let idx = self.vec.to_physical_idx(self.idx); + // This has to use pointer arithmetic as `self.vec[self.idx]` or + // `self.vec.get_unchecked(self.idx)` wouldn't work since we + // temporarily set the length of `self.vec` to zero. + // + // SAFETY: + // Since `self.idx` is smaller than `self.end` and `self.end` is + // smaller than `self.old_len`, `idx` is valid for indexing the + // buffer. Also, per the invariant of `self.idx`, this element + // has not been inspected/moved out yet. + Some(unsafe { &*self.vec.ptr().add(idx) }) + } else { + None + }; + f.debug_struct("ExtractIf").field("peek", &peek).finish_non_exhaustive() + } +} diff --git a/alloc/src/collections/vec_deque/mod.rs b/alloc/src/collections/vec_deque/mod.rs index ac619a42d356d..3bdb7415f0c2a 100644 --- a/alloc/src/collections/vec_deque/mod.rs +++ b/alloc/src/collections/vec_deque/mod.rs @@ -32,6 +32,11 @@ pub use self::drain::Drain; mod drain; +#[unstable(feature = "vec_deque_extract_if", issue = "147750")] +pub use self::extract_if::ExtractIf; + +mod extract_if; + #[stable(feature = "rust1", since = "1.0.0")] pub use self::iter_mut::IterMut; @@ -542,6 +547,95 @@ impl VecDeque { } debug_assert!(self.head < self.capacity() || self.capacity() == 0); } + + /// Creates an iterator which uses a closure to determine if an element in the range should be removed. + /// + /// If the closure returns `true`, the element is removed from the deque and yielded. If the closure + /// returns `false`, or panics, the element remains in the deque and will not be yielded. + /// + /// Only elements that fall in the provided range are considered for extraction, but any elements + /// after the range will still have to be moved if any element has been extracted. + /// + /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating + /// or the iteration short-circuits, then the remaining elements will be retained. + /// Use [`retain_mut`] with a negated predicate if you do not need the returned iterator. + /// + /// [`retain_mut`]: VecDeque::retain_mut + /// + /// Using this method is equivalent to the following code: + /// + /// ``` + /// #![feature(vec_deque_extract_if)] + /// # use std::collections::VecDeque; + /// # let some_predicate = |x: &mut i32| { *x % 2 == 1 }; + /// # let mut deq: VecDeque<_> = (0..10).collect(); + /// # let mut deq2 = deq.clone(); + /// # let range = 1..5; + /// let mut i = range.start; + /// let end_items = deq.len() - range.end; + /// # let mut extracted = vec![]; + /// + /// while i < deq.len() - end_items { + /// if some_predicate(&mut deq[i]) { + /// let val = deq.remove(i).unwrap(); + /// // your code here + /// # extracted.push(val); + /// } else { + /// i += 1; + /// } + /// } + /// + /// # let extracted2: Vec<_> = deq2.extract_if(range, some_predicate).collect(); + /// # assert_eq!(deq, deq2); + /// # assert_eq!(extracted, extracted2); + /// ``` + /// + /// But `extract_if` is easier to use. `extract_if` is also more efficient, + /// because it can backshift the elements of the array in bulk. + /// + /// The iterator also lets you mutate the value of each element in the + /// closure, regardless of whether you choose to keep or remove it. + /// + /// # Panics + /// + /// If `range` is out of bounds. + /// + /// # Examples + /// + /// Splitting a deque into even and odd values, reusing the original deque: + /// + /// ``` + /// #![feature(vec_deque_extract_if)] + /// use std::collections::VecDeque; + /// + /// let mut numbers = VecDeque::from([1, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]); + /// + /// let evens = numbers.extract_if(.., |x| *x % 2 == 0).collect::>(); + /// let odds = numbers; + /// + /// assert_eq!(evens, VecDeque::from([2, 4, 6, 8, 14])); + /// assert_eq!(odds, VecDeque::from([1, 3, 5, 9, 11, 13, 15])); + /// ``` + /// + /// Using the range argument to only process a part of the deque: + /// + /// ``` + /// #![feature(vec_deque_extract_if)] + /// use std::collections::VecDeque; + /// + /// let mut items = VecDeque::from([0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 1, 2]); + /// let ones = items.extract_if(7.., |x| *x == 1).collect::>(); + /// assert_eq!(items, VecDeque::from([0, 0, 0, 0, 0, 0, 0, 2, 2, 2])); + /// assert_eq!(ones.len(), 3); + /// ``` + #[unstable(feature = "vec_deque_extract_if", issue = "147750")] + pub fn extract_if(&mut self, range: R, filter: F) -> ExtractIf<'_, T, F, A> + where + F: FnMut(&mut T) -> bool, + R: RangeBounds, + { + ExtractIf::new(self, filter, range) + } } impl VecDeque { diff --git a/alloc/src/collections/vec_deque/tests.rs b/alloc/src/collections/vec_deque/tests.rs index 2501534e95080..dc50cc34d9dac 100644 --- a/alloc/src/collections/vec_deque/tests.rs +++ b/alloc/src/collections/vec_deque/tests.rs @@ -1,6 +1,8 @@ -use core::iter::TrustedLen; +use std::iter::TrustedLen; +use std::panic::{AssertUnwindSafe, catch_unwind}; use super::*; +use crate::testing::crash_test::{CrashTestDummy, Panic}; use crate::testing::macros::struct_with_counted_drop; #[bench] @@ -1161,3 +1163,271 @@ fn issue_80303() { assert_eq!(vda, vdb); assert_eq!(hash_code(vda), hash_code(vdb)); } + +#[test] +fn extract_if_test() { + let mut m: VecDeque = VecDeque::from([1, 2, 3, 4, 5, 6]); + let deleted = m.extract_if(.., |v| *v < 4).collect::>(); + + assert_eq!(deleted, &[1, 2, 3]); + assert_eq!(m, &[4, 5, 6]); +} + +#[test] +fn drain_to_empty_test() { + let mut m: VecDeque = VecDeque::from([1, 2, 3, 4, 5, 6]); + let deleted = m.extract_if(.., |_| true).collect::>(); + + assert_eq!(deleted, &[1, 2, 3, 4, 5, 6]); + assert_eq!(m, &[]); +} + +#[test] +fn extract_if_empty() { + let mut list: VecDeque = VecDeque::new(); + + { + let mut iter = list.extract_if(.., |_| true); + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + assert_eq!(iter.size_hint(), (0, Some(0))); + } + + assert_eq!(list.len(), 0); + assert_eq!(list, vec![]); +} + +#[test] +fn extract_if_zst() { + let mut list: VecDeque<_> = [(), (), (), (), ()].into_iter().collect(); + let initial_len = list.len(); + let mut count = 0; + + { + let mut iter = list.extract_if(.., |_| true); + assert_eq!(iter.size_hint(), (0, Some(initial_len))); + while let Some(_) = iter.next() { + count += 1; + assert_eq!(iter.size_hint(), (0, Some(initial_len - count))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + assert_eq!(iter.size_hint(), (0, Some(0))); + } + + assert_eq!(count, initial_len); + assert_eq!(list.len(), 0); + assert_eq!(list, vec![]); +} + +#[test] +fn extract_if_false() { + let mut list: VecDeque<_> = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_iter().collect(); + + let initial_len = list.len(); + let mut count = 0; + + { + let mut iter = list.extract_if(.., |_| false); + assert_eq!(iter.size_hint(), (0, Some(initial_len))); + for _ in iter.by_ref() { + count += 1; + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + assert_eq!(iter.size_hint(), (0, Some(0))); + } + + assert_eq!(count, 0); + assert_eq!(list.len(), initial_len); + assert_eq!(list, vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); +} + +#[test] +fn extract_if_true() { + let mut list: VecDeque<_> = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_iter().collect(); + + let initial_len = list.len(); + let mut count = 0; + + { + let mut iter = list.extract_if(.., |_| true); + assert_eq!(iter.size_hint(), (0, Some(initial_len))); + while let Some(_) = iter.next() { + count += 1; + assert_eq!(iter.size_hint(), (0, Some(initial_len - count))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + assert_eq!(iter.size_hint(), (0, Some(0))); + } + + assert_eq!(count, initial_len); + assert_eq!(list.len(), 0); + assert_eq!(list, vec![]); +} + +#[test] +fn extract_if_non_contiguous() { + let mut list = + [1, 2, 4, 6, 7, 9, 11, 13, 15, 17, 18, 20, 22, 24, 26, 27, 29, 31, 33, 34, 35, 36, 37, 39] + .into_iter() + .collect::>(); + list.rotate_left(3); + + assert!(!list.is_contiguous()); + assert_eq!( + list, + [6, 7, 9, 11, 13, 15, 17, 18, 20, 22, 24, 26, 27, 29, 31, 33, 34, 35, 36, 37, 39, 1, 2, 4] + ); + + let removed = list.extract_if(.., |x| *x % 2 == 0).collect::>(); + assert_eq!(removed.len(), 10); + assert_eq!(removed, vec![6, 18, 20, 22, 24, 26, 34, 36, 2, 4]); + + assert_eq!(list.len(), 14); + assert_eq!(list, vec![7, 9, 11, 13, 15, 17, 27, 29, 31, 33, 35, 37, 39, 1]); +} + +#[test] +fn extract_if_complex() { + { + // [+xxx++++++xxxxx++++x+x++] + let mut list = [ + 1, 2, 4, 6, 7, 9, 11, 13, 15, 17, 18, 20, 22, 24, 26, 27, 29, 31, 33, 34, 35, 36, 37, + 39, + ] + .into_iter() + .collect::>(); + + let removed = list.extract_if(.., |x| *x % 2 == 0).collect::>(); + assert_eq!(removed.len(), 10); + assert_eq!(removed, vec![2, 4, 6, 18, 20, 22, 24, 26, 34, 36]); + + assert_eq!(list.len(), 14); + assert_eq!(list, vec![1, 7, 9, 11, 13, 15, 17, 27, 29, 31, 33, 35, 37, 39]); + } + + { + // [xxx++++++xxxxx++++x+x++] + let mut list = + [2, 4, 6, 7, 9, 11, 13, 15, 17, 18, 20, 22, 24, 26, 27, 29, 31, 33, 34, 35, 36, 37, 39] + .into_iter() + .collect::>(); + + let removed = list.extract_if(.., |x| *x % 2 == 0).collect::>(); + assert_eq!(removed.len(), 10); + assert_eq!(removed, vec![2, 4, 6, 18, 20, 22, 24, 26, 34, 36]); + + assert_eq!(list.len(), 13); + assert_eq!(list, vec![7, 9, 11, 13, 15, 17, 27, 29, 31, 33, 35, 37, 39]); + } + + { + // [xxx++++++xxxxx++++x+x] + let mut list = + [2, 4, 6, 7, 9, 11, 13, 15, 17, 18, 20, 22, 24, 26, 27, 29, 31, 33, 34, 35, 36] + .into_iter() + .collect::>(); + + let removed = list.extract_if(.., |x| *x % 2 == 0).collect::>(); + assert_eq!(removed.len(), 10); + assert_eq!(removed, vec![2, 4, 6, 18, 20, 22, 24, 26, 34, 36]); + + assert_eq!(list.len(), 11); + assert_eq!(list, vec![7, 9, 11, 13, 15, 17, 27, 29, 31, 33, 35]); + } + + { + // [xxxxxxxxxx+++++++++++] + let mut list = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19] + .into_iter() + .collect::>(); + + let removed = list.extract_if(.., |x| *x % 2 == 0).collect::>(); + assert_eq!(removed.len(), 10); + assert_eq!(removed, vec![2, 4, 6, 8, 10, 12, 14, 16, 18, 20]); + + assert_eq!(list.len(), 10); + assert_eq!(list, vec![1, 3, 5, 7, 9, 11, 13, 15, 17, 19]); + } + + { + // [+++++++++++xxxxxxxxxx] + let mut list = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20] + .into_iter() + .collect::>(); + + let removed = list.extract_if(.., |x| *x % 2 == 0).collect::>(); + assert_eq!(removed.len(), 10); + assert_eq!(removed, vec![2, 4, 6, 8, 10, 12, 14, 16, 18, 20]); + + assert_eq!(list.len(), 10); + assert_eq!(list, vec![1, 3, 5, 7, 9, 11, 13, 15, 17, 19]); + } +} + +#[test] +#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")] +fn extract_if_drop_panic_leak() { + let d0 = CrashTestDummy::new(0); + let d1 = CrashTestDummy::new(1); + let d2 = CrashTestDummy::new(2); + let d3 = CrashTestDummy::new(3); + let d4 = CrashTestDummy::new(4); + let d5 = CrashTestDummy::new(5); + let d6 = CrashTestDummy::new(6); + let d7 = CrashTestDummy::new(7); + let mut q = VecDeque::new(); + q.push_back(d3.spawn(Panic::Never)); + q.push_back(d4.spawn(Panic::Never)); + q.push_back(d5.spawn(Panic::Never)); + q.push_back(d6.spawn(Panic::Never)); + q.push_back(d7.spawn(Panic::Never)); + q.push_front(d2.spawn(Panic::Never)); + q.push_front(d1.spawn(Panic::InDrop)); + q.push_front(d0.spawn(Panic::Never)); + + catch_unwind(AssertUnwindSafe(|| q.extract_if(.., |_| true).for_each(drop))).unwrap_err(); + + assert_eq!(d0.dropped(), 1); + assert_eq!(d1.dropped(), 1); + assert_eq!(d2.dropped(), 0); + assert_eq!(d3.dropped(), 0); + assert_eq!(d4.dropped(), 0); + assert_eq!(d5.dropped(), 0); + assert_eq!(d6.dropped(), 0); + assert_eq!(d7.dropped(), 0); + drop(q); + assert_eq!(d2.dropped(), 1); + assert_eq!(d3.dropped(), 1); + assert_eq!(d4.dropped(), 1); + assert_eq!(d5.dropped(), 1); + assert_eq!(d6.dropped(), 1); + assert_eq!(d7.dropped(), 1); +} + +#[test] +#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")] +fn extract_if_pred_panic_leak() { + struct_with_counted_drop!(D(u32), DROPS); + + let mut q = VecDeque::new(); + q.push_back(D(3)); + q.push_back(D(4)); + q.push_back(D(5)); + q.push_back(D(6)); + q.push_back(D(7)); + q.push_front(D(2)); + q.push_front(D(1)); + q.push_front(D(0)); + + _ = catch_unwind(AssertUnwindSafe(|| { + q.extract_if(.., |item| if item.0 >= 2 { panic!() } else { true }).for_each(drop) + })); + + assert_eq!(DROPS.get(), 2); // 0 and 1 + assert_eq!(q.len(), 6); +} From 758d9757ed5d83bdbc27564148d79f9a4556051e Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Sat, 11 Oct 2025 01:31:25 +0100 Subject: [PATCH 244/358] refactor: move runtime functions to core Instead of `include_str!()`ing `range_search.rs`, just make it a normal module under `core::unicode`. This means the same source code doesn't have to be checked in twice, and it plays nicer with IDEs. Also rename it to `rt` since it includes functions for searching the bitsets and case conversion tables as well as the range represesentation. --- core/src/unicode/mod.rs | 1 + core/src/unicode/rt.rs | 162 ++++++++++++++++++++++++ core/src/unicode/unicode_data.rs | 208 +++++++------------------------ 3 files changed, 211 insertions(+), 160 deletions(-) create mode 100644 core/src/unicode/rt.rs diff --git a/core/src/unicode/mod.rs b/core/src/unicode/mod.rs index c71fa754e68fb..9bc4136517fae 100644 --- a/core/src/unicode/mod.rs +++ b/core/src/unicode/mod.rs @@ -18,6 +18,7 @@ pub(crate) use unicode_data::white_space::lookup as White_Space; pub(crate) mod printable; +mod rt; #[allow(unreachable_pub)] mod unicode_data; diff --git a/core/src/unicode/rt.rs b/core/src/unicode/rt.rs new file mode 100644 index 0000000000000..c438635cd794e --- /dev/null +++ b/core/src/unicode/rt.rs @@ -0,0 +1,162 @@ +//! Runtime support for `unicode_data`. + +#[inline(always)] +pub(super) const fn bitset_search< + const N: usize, + const CHUNK_SIZE: usize, + const N1: usize, + const CANONICAL: usize, + const CANONICALIZED: usize, +>( + needle: u32, + chunk_idx_map: &[u8; N], + bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], + bitset_canonical: &[u64; CANONICAL], + bitset_canonicalized: &[(u8, u8); CANONICALIZED], +) -> bool { + let bucket_idx = (needle / 64) as usize; + let chunk_map_idx = bucket_idx / CHUNK_SIZE; + let chunk_piece = bucket_idx % CHUNK_SIZE; + // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. + let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { + chunk_idx_map[chunk_map_idx] + } else { + return false; + }; + let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; + // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. + let word = if idx < bitset_canonical.len() { + bitset_canonical[idx] + } else { + let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()]; + let mut word = bitset_canonical[real_idx as usize]; + let should_invert = mapping & (1 << 6) != 0; + if should_invert { + word = !word; + } + // Lower 6 bits + let quantity = mapping & ((1 << 6) - 1); + if mapping & (1 << 7) != 0 { + // shift + word >>= quantity as u64; + } else { + word = word.rotate_left(quantity as u32); + } + word + }; + (word & (1 << (needle % 64) as u64)) != 0 +} + +#[repr(transparent)] +pub(super) struct ShortOffsetRunHeader(pub(super) u32); + +impl ShortOffsetRunHeader { + pub(super) const fn new(start_index: usize, prefix_sum: u32) -> Self { + assert!(start_index < (1 << 11)); + assert!(prefix_sum < (1 << 21)); + + Self((start_index as u32) << 21 | prefix_sum) + } + + #[inline] + pub(super) const fn start_index(&self) -> usize { + (self.0 >> 21) as usize + } + + #[inline] + pub(super) const fn prefix_sum(&self) -> u32 { + self.0 & ((1 << 21) - 1) + } +} + +/// # Safety +/// +/// - The last element of `short_offset_runs` must be greater than `std::char::MAX`. +/// - The start indices of all elements in `short_offset_runs` must be less than `OFFSETS`. +#[inline(always)] +pub(super) unsafe fn skip_search( + needle: char, + short_offset_runs: &[ShortOffsetRunHeader; SOR], + offsets: &[u8; OFFSETS], +) -> bool { + let needle = needle as u32; + + let last_idx = + match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header.0 << 11) { + Ok(idx) => idx + 1, + Err(idx) => idx, + }; + // SAFETY: `last_idx` *cannot* be past the end of the array, as the last + // element is greater than `std::char::MAX` (the largest possible needle) + // as guaranteed by the caller. + // + // So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the + // correct location cannot be past it, so `Err(idx) => idx != length` either. + // + // This means that we can avoid bounds checking for the accesses below, too. + // + // We need to use `intrinsics::assume` since the `panic_nounwind` contained + // in `hint::assert_unchecked` may not be optimized out. + unsafe { crate::intrinsics::assume(last_idx < SOR) }; + + let mut offset_idx = short_offset_runs[last_idx].start_index(); + let length = if let Some(next) = short_offset_runs.get(last_idx + 1) { + (*next).start_index() - offset_idx + } else { + offsets.len() - offset_idx + }; + + let prev = + last_idx.checked_sub(1).map(|prev| short_offset_runs[prev].prefix_sum()).unwrap_or(0); + + let total = needle - prev; + let mut prefix_sum = 0; + for _ in 0..(length - 1) { + // SAFETY: It is guaranteed that `length <= OFFSETS - offset_idx`, + // so it follows that `length - 1 + offset_idx < OFFSETS`, therefore + // `offset_idx < OFFSETS` is always true in this loop. + // + // We need to use `intrinsics::assume` since the `panic_nounwind` contained + // in `hint::assert_unchecked` may not be optimized out. + unsafe { crate::intrinsics::assume(offset_idx < OFFSETS) }; + let offset = offsets[offset_idx]; + prefix_sum += offset as u32; + if prefix_sum > total { + break; + } + offset_idx += 1; + } + offset_idx % 2 == 1 +} + +/// # Safety +/// The second component of each tuple in `table` must either be: +/// - A valid `char` +/// - A value with the high bit (1 << 22) set, and the lower 22 bits +/// being a valid index into `multi`. +#[inline(always)] +pub(super) unsafe fn case_conversion( + c: char, + ascii_fn: fn(char) -> char, + table: &[(char, u32)], + multi: &[[char; 3]], +) -> [char; 3] { + const INDEX_MASK: u32 = 1 << 22; + + if c.is_ascii() { + return [ascii_fn(c), '\0', '\0']; + } + + let Ok(i) = table.binary_search_by(|&(key, _)| key.cmp(&c)) else { + return [c, '\0', '\0']; + }; + + let u = table[i].1; + match char::from_u32(u) { + Option::Some(c) => [c, '\0', '\0'], + Option::None => { + // SAFETY: Index comes from statically generated table + unsafe { *multi.get_unchecked((u & (INDEX_MASK - 1)) as usize) } + } + } +} diff --git a/core/src/unicode/unicode_data.rs b/core/src/unicode/unicode_data.rs index 3c38b44224f87..f9ab1585039e9 100644 --- a/core/src/unicode/unicode_data.rs +++ b/core/src/unicode/unicode_data.rs @@ -11,135 +11,7 @@ // to_upper : 13656 bytes // Total : 31911 bytes -#[inline(always)] -const fn bitset_search< - const N: usize, - const CHUNK_SIZE: usize, - const N1: usize, - const CANONICAL: usize, - const CANONICALIZED: usize, ->( - needle: u32, - chunk_idx_map: &[u8; N], - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], - bitset_canonical: &[u64; CANONICAL], - bitset_canonicalized: &[(u8, u8); CANONICALIZED], -) -> bool { - let bucket_idx = (needle / 64) as usize; - let chunk_map_idx = bucket_idx / CHUNK_SIZE; - let chunk_piece = bucket_idx % CHUNK_SIZE; - // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. - let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { - chunk_idx_map[chunk_map_idx] - } else { - return false; - }; - let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; - // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. - let word = if idx < bitset_canonical.len() { - bitset_canonical[idx] - } else { - let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()]; - let mut word = bitset_canonical[real_idx as usize]; - let should_invert = mapping & (1 << 6) != 0; - if should_invert { - word = !word; - } - // Lower 6 bits - let quantity = mapping & ((1 << 6) - 1); - if mapping & (1 << 7) != 0 { - // shift - word >>= quantity as u64; - } else { - word = word.rotate_left(quantity as u32); - } - word - }; - (word & (1 << (needle % 64) as u64)) != 0 -} - -#[repr(transparent)] -struct ShortOffsetRunHeader(u32); - -impl ShortOffsetRunHeader { - const fn new(start_index: usize, prefix_sum: u32) -> Self { - assert!(start_index < (1 << 11)); - assert!(prefix_sum < (1 << 21)); - - Self((start_index as u32) << 21 | prefix_sum) - } - - #[inline] - const fn start_index(&self) -> usize { - (self.0 >> 21) as usize - } - - #[inline] - const fn prefix_sum(&self) -> u32 { - self.0 & ((1 << 21) - 1) - } -} - -/// # Safety -/// -/// - The last element of `short_offset_runs` must be greater than `std::char::MAX`. -/// - The start indices of all elements in `short_offset_runs` must be less than `OFFSETS`. -#[inline(always)] -unsafe fn skip_search( - needle: char, - short_offset_runs: &[ShortOffsetRunHeader; SOR], - offsets: &[u8; OFFSETS], -) -> bool { - let needle = needle as u32; - - let last_idx = - match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header.0 << 11) { - Ok(idx) => idx + 1, - Err(idx) => idx, - }; - // SAFETY: `last_idx` *cannot* be past the end of the array, as the last - // element is greater than `std::char::MAX` (the largest possible needle) - // as guaranteed by the caller. - // - // So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the - // correct location cannot be past it, so `Err(idx) => idx != length` either. - // - // This means that we can avoid bounds checking for the accesses below, too. - // - // We need to use `intrinsics::assume` since the `panic_nounwind` contained - // in `hint::assert_unchecked` may not be optimized out. - unsafe { crate::intrinsics::assume(last_idx < SOR) }; - - let mut offset_idx = short_offset_runs[last_idx].start_index(); - let length = if let Some(next) = short_offset_runs.get(last_idx + 1) { - (*next).start_index() - offset_idx - } else { - offsets.len() - offset_idx - }; - - let prev = - last_idx.checked_sub(1).map(|prev| short_offset_runs[prev].prefix_sum()).unwrap_or(0); - - let total = needle - prev; - let mut prefix_sum = 0; - for _ in 0..(length - 1) { - // SAFETY: It is guaranteed that `length <= OFFSETS - offset_idx`, - // so it follows that `length - 1 + offset_idx < OFFSETS`, therefore - // `offset_idx < OFFSETS` is always true in this loop. - // - // We need to use `intrinsics::assume` since the `panic_nounwind` contained - // in `hint::assert_unchecked` may not be optimized out. - unsafe { crate::intrinsics::assume(offset_idx < OFFSETS) }; - let offset = offsets[offset_idx]; - prefix_sum += offset as u32; - if prefix_sum > total { - break; - } - offset_idx += 1; - } - offset_idx % 2 == 1 -} - +use super::rt::*; pub const UNICODE_VERSION: (u8, u8, u8) = (17, 0, 0); #[rustfmt::skip] @@ -758,42 +630,32 @@ pub mod white_space { #[rustfmt::skip] pub mod conversions { - const INDEX_MASK: u32 = 0x400000; - + #[inline] pub fn to_lower(c: char) -> [char; 3] { - if c.is_ascii() { - [(c as u8).to_ascii_lowercase() as char, '\0', '\0'] - } else { - LOWERCASE_TABLE - .binary_search_by(|&(key, _)| key.cmp(&c)) - .map(|i| { - let u = LOWERCASE_TABLE[i].1; - char::from_u32(u).map(|c| [c, '\0', '\0']).unwrap_or_else(|| { - // SAFETY: Index comes from statically generated table - unsafe { *LOWERCASE_TABLE_MULTI.get_unchecked((u & (INDEX_MASK - 1)) as usize) } - }) - }) - .unwrap_or([c, '\0', '\0']) + const { + let mut i = 0; + while i < LOWERCASE_TABLE.len() { + let (_, val) = LOWERCASE_TABLE[i]; + if val & (1 << 22) == 0 { + assert!(char::from_u32(val).is_some()); + } else { + let index = val & ((1 << 22) - 1); + assert!((index as usize) < LOWERCASE_TABLE_MULTI.len()); + } + i += 1; + } } - } - pub fn to_upper(c: char) -> [char; 3] { - if c.is_ascii() { - [(c as u8).to_ascii_uppercase() as char, '\0', '\0'] - } else { - UPPERCASE_TABLE - .binary_search_by(|&(key, _)| key.cmp(&c)) - .map(|i| { - let u = UPPERCASE_TABLE[i].1; - char::from_u32(u).map(|c| [c, '\0', '\0']).unwrap_or_else(|| { - // SAFETY: Index comes from statically generated table - unsafe { *UPPERCASE_TABLE_MULTI.get_unchecked((u & (INDEX_MASK - 1)) as usize) } - }) - }) - .unwrap_or([c, '\0', '\0']) + // SAFETY: Just checked that the tables are valid + unsafe { + super::case_conversion( + c, + |c| c.to_ascii_lowercase(), + LOWERCASE_TABLE, + LOWERCASE_TABLE_MULTI, + ) } } - static LOWERCASE_TABLE: &[(char, u32); 1462] = &[ ('\u{c0}', 224), ('\u{c1}', 225), ('\u{c2}', 226), ('\u{c3}', 227), ('\u{c4}', 228), ('\u{c5}', 229), ('\u{c6}', 230), ('\u{c7}', 231), ('\u{c8}', 232), ('\u{c9}', 233), @@ -1155,6 +1017,32 @@ pub mod conversions { ['i', '\u{307}', '\u{0}'], ]; + #[inline] + pub fn to_upper(c: char) -> [char; 3] { + const { + let mut i = 0; + while i < UPPERCASE_TABLE.len() { + let (_, val) = UPPERCASE_TABLE[i]; + if val & (1 << 22) == 0 { + assert!(char::from_u32(val).is_some()); + } else { + let index = val & ((1 << 22) - 1); + assert!((index as usize) < UPPERCASE_TABLE_MULTI.len()); + } + i += 1; + } + } + + // SAFETY: Just checked that the tables are valid + unsafe { + super::case_conversion( + c, + |c| c.to_ascii_uppercase(), + UPPERCASE_TABLE, + UPPERCASE_TABLE_MULTI, + ) + } + } static UPPERCASE_TABLE: &[(char, u32); 1554] = &[ ('\u{b5}', 924), ('\u{df}', 4194304), ('\u{e0}', 192), ('\u{e1}', 193), ('\u{e2}', 194), ('\u{e3}', 195), ('\u{e4}', 196), ('\u{e5}', 197), ('\u{e6}', 198), ('\u{e7}', 199), From 339593eb1f1aa9b5320fbdfd058fa9f7389135c6 Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Sat, 11 Oct 2025 03:41:46 +0100 Subject: [PATCH 245/358] refactor: format `unicode_data` Remove `#[rustfmt::skip]` from all the generated modules in `unicode_data.rs`. This means we won't have to worry so much about getting indetation and formatting right when generating code. Exempted for now some tables which would be too big when formatted by `rustfmt`. --- core/src/unicode/unicode_data.rs | 669 ++++++++++++++++++------------- 1 file changed, 399 insertions(+), 270 deletions(-) diff --git a/core/src/unicode/unicode_data.rs b/core/src/unicode/unicode_data.rs index f9ab1585039e9..9d03529229bf1 100644 --- a/core/src/unicode/unicode_data.rs +++ b/core/src/unicode/unicode_data.rs @@ -14,36 +14,60 @@ use super::rt::*; pub const UNICODE_VERSION: (u8, u8, u8) = (17, 0, 0); -#[rustfmt::skip] pub mod alphabetic { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 51] = [ - ShortOffsetRunHeader::new(0, 706), ShortOffsetRunHeader::new(12, 4681), - ShortOffsetRunHeader::new(414, 5741), ShortOffsetRunHeader::new(452, 7958), - ShortOffsetRunHeader::new(552, 9398), ShortOffsetRunHeader::new(623, 11264), - ShortOffsetRunHeader::new(625, 12293), ShortOffsetRunHeader::new(663, 13312), - ShortOffsetRunHeader::new(687, 19904), ShortOffsetRunHeader::new(688, 42125), - ShortOffsetRunHeader::new(690, 42509), ShortOffsetRunHeader::new(694, 55204), - ShortOffsetRunHeader::new(778, 63744), ShortOffsetRunHeader::new(783, 64110), - ShortOffsetRunHeader::new(784, 64830), ShortOffsetRunHeader::new(806, 66176), - ShortOffsetRunHeader::new(847, 67383), ShortOffsetRunHeader::new(894, 73440), - ShortOffsetRunHeader::new(1217, 74650), ShortOffsetRunHeader::new(1228, 77712), - ShortOffsetRunHeader::new(1233, 78896), ShortOffsetRunHeader::new(1236, 82939), - ShortOffsetRunHeader::new(1240, 83527), ShortOffsetRunHeader::new(1242, 90368), - ShortOffsetRunHeader::new(1243, 92160), ShortOffsetRunHeader::new(1245, 92729), - ShortOffsetRunHeader::new(1246, 93504), ShortOffsetRunHeader::new(1261, 101590), - ShortOffsetRunHeader::new(1282, 110576), ShortOffsetRunHeader::new(1287, 110883), - ShortOffsetRunHeader::new(1294, 111356), ShortOffsetRunHeader::new(1304, 113664), - ShortOffsetRunHeader::new(1305, 119808), ShortOffsetRunHeader::new(1315, 120486), - ShortOffsetRunHeader::new(1352, 122624), ShortOffsetRunHeader::new(1375, 123536), - ShortOffsetRunHeader::new(1399, 124112), ShortOffsetRunHeader::new(1403, 126464), - ShortOffsetRunHeader::new(1431, 127280), ShortOffsetRunHeader::new(1497, 131072), - ShortOffsetRunHeader::new(1503, 173792), ShortOffsetRunHeader::new(1504, 178206), - ShortOffsetRunHeader::new(1506, 183982), ShortOffsetRunHeader::new(1508, 191457), - ShortOffsetRunHeader::new(1510, 192094), ShortOffsetRunHeader::new(1512, 194560), - ShortOffsetRunHeader::new(1513, 195102), ShortOffsetRunHeader::new(1514, 196608), - ShortOffsetRunHeader::new(1515, 201547), ShortOffsetRunHeader::new(1516, 210042), + ShortOffsetRunHeader::new(0, 706), + ShortOffsetRunHeader::new(12, 4681), + ShortOffsetRunHeader::new(414, 5741), + ShortOffsetRunHeader::new(452, 7958), + ShortOffsetRunHeader::new(552, 9398), + ShortOffsetRunHeader::new(623, 11264), + ShortOffsetRunHeader::new(625, 12293), + ShortOffsetRunHeader::new(663, 13312), + ShortOffsetRunHeader::new(687, 19904), + ShortOffsetRunHeader::new(688, 42125), + ShortOffsetRunHeader::new(690, 42509), + ShortOffsetRunHeader::new(694, 55204), + ShortOffsetRunHeader::new(778, 63744), + ShortOffsetRunHeader::new(783, 64110), + ShortOffsetRunHeader::new(784, 64830), + ShortOffsetRunHeader::new(806, 66176), + ShortOffsetRunHeader::new(847, 67383), + ShortOffsetRunHeader::new(894, 73440), + ShortOffsetRunHeader::new(1217, 74650), + ShortOffsetRunHeader::new(1228, 77712), + ShortOffsetRunHeader::new(1233, 78896), + ShortOffsetRunHeader::new(1236, 82939), + ShortOffsetRunHeader::new(1240, 83527), + ShortOffsetRunHeader::new(1242, 90368), + ShortOffsetRunHeader::new(1243, 92160), + ShortOffsetRunHeader::new(1245, 92729), + ShortOffsetRunHeader::new(1246, 93504), + ShortOffsetRunHeader::new(1261, 101590), + ShortOffsetRunHeader::new(1282, 110576), + ShortOffsetRunHeader::new(1287, 110883), + ShortOffsetRunHeader::new(1294, 111356), + ShortOffsetRunHeader::new(1304, 113664), + ShortOffsetRunHeader::new(1305, 119808), + ShortOffsetRunHeader::new(1315, 120486), + ShortOffsetRunHeader::new(1352, 122624), + ShortOffsetRunHeader::new(1375, 123536), + ShortOffsetRunHeader::new(1399, 124112), + ShortOffsetRunHeader::new(1403, 126464), + ShortOffsetRunHeader::new(1431, 127280), + ShortOffsetRunHeader::new(1497, 131072), + ShortOffsetRunHeader::new(1503, 173792), + ShortOffsetRunHeader::new(1504, 178206), + ShortOffsetRunHeader::new(1506, 183982), + ShortOffsetRunHeader::new(1508, 191457), + ShortOffsetRunHeader::new(1510, 192094), + ShortOffsetRunHeader::new(1512, 194560), + ShortOffsetRunHeader::new(1513, 195102), + ShortOffsetRunHeader::new(1514, 196608), + ShortOffsetRunHeader::new(1515, 201547), + ShortOffsetRunHeader::new(1516, 210042), ShortOffsetRunHeader::new(1518, 1324154), ]; static OFFSETS: [u8; 1519] = [ @@ -52,57 +76,58 @@ pub mod alphabetic { 1, 2, 1, 2, 1, 1, 8, 27, 4, 4, 29, 11, 5, 56, 1, 7, 14, 102, 1, 8, 4, 8, 4, 3, 10, 3, 2, 1, 16, 48, 13, 101, 24, 33, 9, 2, 4, 1, 5, 24, 2, 19, 19, 25, 7, 11, 5, 24, 1, 7, 7, 1, 8, 42, 10, 12, 3, 7, 6, 76, 1, 16, 1, 3, 4, 15, 13, 19, 1, 8, 2, 2, 2, 22, 1, 7, 1, 1, 3, 4, 3, 8, - 2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7, 1, 2, 1, 2, - 1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7, 1, 2, 1, 5, - 3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2, - 2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2, 3, 3, 3, 12, - 4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 13, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8, 2, 1, 3, 1, - 2, 2, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 5, 3, 1, 4, 13, 3, - 12, 13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3, 24, 1, 9, 1, - 1, 2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, 24, 1, 1, 1, 19, - 1, 3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 19, 4, 16, 1, 36, 67, 55, 1, 1, 2, 5, - 16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, 2, 41, 1, 4, 2, 33, - 1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, 6, 3, 0, 2, 17, 1, 26, - 5, 75, 3, 11, 7, 20, 11, 21, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, 19, 14, 1, 4, 1, 67, 89, - 7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, 54, 28, 4, 63, 2, 20, 50, 1, - 23, 2, 11, 3, 49, 52, 1, 15, 1, 8, 51, 42, 2, 4, 10, 44, 1, 11, 14, 55, 22, 3, 10, 36, 2, - 11, 5, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 19, 34, 11, 0, 2, 6, 2, 38, 2, 6, 2, 8, 1, - 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, 1, - 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 11, 2, 4, 5, 5, - 4, 1, 17, 41, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 2, 56, 7, 1, 16, 23, 9, 7, 1, - 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, 25, 9, 7, 5, 2, 5, 4, 86, 6, 3, - 1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 67, 46, 2, 0, 3, 16, 10, 2, 20, 47, - 5, 8, 3, 113, 39, 9, 2, 103, 2, 82, 20, 21, 1, 33, 24, 52, 12, 68, 1, 1, 44, 6, 3, 1, 1, 3, - 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5, 1, 55, 9, 14, 18, 23, 3, 69, 1, - 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7, 1, 43, 1, 14, 6, 123, 21, 0, 12, - 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5, 1, 1, 1, 2, 1, 2, 1, 108, 33, 0, - 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11, 89, 3, 6, 2, 6, 2, 6, 2, 3, 35, - 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0, 29, 3, 49, 47, 32, 13, 30, 5, 43, - 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40, 8, 52, 12, 11, 1, 15, 1, 7, 1, 2, - 1, 11, 1, 15, 1, 7, 1, 2, 3, 52, 12, 0, 9, 22, 10, 8, 24, 6, 1, 42, 1, 9, 69, 6, 2, 1, 1, - 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2, 10, 22, 10, 26, 6, 26, 38, 56, 6, 2, 64, - 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28, 27, 54, 10, 22, 10, 19, 13, 18, - 110, 73, 55, 51, 13, 51, 13, 40, 34, 28, 3, 1, 5, 23, 250, 42, 1, 2, 3, 2, 16, 6, 50, 3, 3, - 29, 10, 1, 8, 22, 42, 18, 46, 21, 27, 23, 9, 70, 43, 5, 10, 57, 9, 1, 13, 25, 23, 51, 17, 4, - 8, 35, 3, 1, 9, 64, 1, 4, 9, 2, 10, 1, 1, 1, 35, 18, 1, 34, 2, 1, 6, 4, 62, 7, 1, 1, 1, 4, - 1, 15, 1, 10, 7, 57, 23, 4, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2, 2, 2, 3, 1, 6, - 1, 5, 7, 28, 10, 1, 1, 2, 1, 1, 38, 1, 10, 1, 1, 2, 1, 1, 4, 1, 2, 3, 1, 1, 1, 44, 66, 1, 3, - 1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, 25, 6, 34, 63, 1, 1, 3, 1, 59, 54, 2, 1, 71, - 27, 2, 14, 21, 7, 185, 57, 103, 64, 31, 8, 2, 1, 2, 8, 1, 2, 1, 30, 1, 2, 2, 2, 2, 4, 93, 8, - 2, 46, 2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, 72, 5, 1, 18, 73, 103, 8, 88, 33, 31, 9, 1, 45, - 1, 7, 1, 1, 49, 30, 2, 22, 1, 14, 73, 7, 1, 2, 1, 44, 3, 1, 1, 2, 1, 3, 1, 1, 2, 2, 24, 6, - 1, 2, 1, 37, 1, 2, 1, 4, 1, 1, 23, 44, 0, 23, 9, 17, 1, 41, 3, 3, 111, 1, 79, 0, 102, 111, - 17, 196, 0, 97, 15, 0, 17, 6, 25, 0, 5, 0, 0, 47, 0, 0, 7, 31, 17, 79, 17, 30, 18, 48, 16, - 4, 31, 21, 5, 19, 0, 45, 211, 64, 32, 25, 2, 25, 44, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 7, - 9, 0, 41, 32, 97, 115, 0, 4, 1, 7, 1, 2, 1, 0, 15, 1, 29, 3, 2, 1, 14, 4, 8, 0, 0, 107, 5, - 13, 3, 9, 7, 10, 4, 1, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, - 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, - 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 31, 6, 6, 213, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, - 1, 112, 45, 10, 7, 16, 1, 0, 30, 18, 44, 0, 28, 228, 30, 2, 1, 207, 31, 1, 22, 8, 2, 224, 7, - 1, 4, 1, 2, 1, 15, 1, 197, 59, 68, 3, 1, 3, 1, 0, 4, 1, 27, 1, 2, 1, 1, 2, 1, 1, 10, 1, 4, - 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, - 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, 1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, - 32, 0, 2, 0, 2, 0, 15, 0, 0, 0, 0, 0, 5, 0, 0, + 2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7, 1, 2, 1, + 2, 1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7, 1, 2, + 1, 5, 3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, + 8, 2, 2, 2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2, 3, + 3, 3, 12, 4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 13, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8, 2, + 1, 3, 1, 2, 2, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 5, 3, 1, + 4, 13, 3, 12, 13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3, + 24, 1, 9, 1, 1, 2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, + 24, 1, 1, 1, 19, 1, 3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 19, 4, 16, 1, 36, + 67, 55, 1, 1, 2, 5, 16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, + 2, 41, 1, 4, 2, 33, 1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, + 6, 3, 0, 2, 17, 1, 26, 5, 75, 3, 11, 7, 20, 11, 21, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, + 19, 14, 1, 4, 1, 67, 89, 7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, + 54, 28, 4, 63, 2, 20, 50, 1, 23, 2, 11, 3, 49, 52, 1, 15, 1, 8, 51, 42, 2, 4, 10, 44, 1, + 11, 14, 55, 22, 3, 10, 36, 2, 11, 5, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 19, 34, 11, + 0, 2, 6, 2, 38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, + 2, 6, 4, 13, 5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, + 1, 1, 1, 1, 4, 1, 11, 2, 4, 5, 5, 4, 1, 17, 41, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, + 1, 2, 56, 7, 1, 16, 23, 9, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, + 25, 9, 7, 5, 2, 5, 4, 86, 6, 3, 1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 67, + 46, 2, 0, 3, 16, 10, 2, 20, 47, 5, 8, 3, 113, 39, 9, 2, 103, 2, 82, 20, 21, 1, 33, 24, 52, + 12, 68, 1, 1, 44, 6, 3, 1, 1, 3, 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5, + 1, 55, 9, 14, 18, 23, 3, 69, 1, 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7, + 1, 43, 1, 14, 6, 123, 21, 0, 12, 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5, + 1, 1, 1, 2, 1, 2, 1, 108, 33, 0, 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11, + 89, 3, 6, 2, 6, 2, 6, 2, 3, 35, 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0, + 29, 3, 49, 47, 32, 13, 30, 5, 43, 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40, + 8, 52, 12, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 3, 52, 12, 0, 9, 22, 10, 8, 24, + 6, 1, 42, 1, 9, 69, 6, 2, 1, 1, 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2, 10, 22, + 10, 26, 6, 26, 38, 56, 6, 2, 64, 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28, + 27, 54, 10, 22, 10, 19, 13, 18, 110, 73, 55, 51, 13, 51, 13, 40, 34, 28, 3, 1, 5, 23, 250, + 42, 1, 2, 3, 2, 16, 6, 50, 3, 3, 29, 10, 1, 8, 22, 42, 18, 46, 21, 27, 23, 9, 70, 43, 5, + 10, 57, 9, 1, 13, 25, 23, 51, 17, 4, 8, 35, 3, 1, 9, 64, 1, 4, 9, 2, 10, 1, 1, 1, 35, 18, + 1, 34, 2, 1, 6, 4, 62, 7, 1, 1, 1, 4, 1, 15, 1, 10, 7, 57, 23, 4, 1, 8, 2, 2, 2, 22, 1, 7, + 1, 2, 1, 5, 3, 8, 2, 2, 2, 2, 3, 1, 6, 1, 5, 7, 28, 10, 1, 1, 2, 1, 1, 38, 1, 10, 1, 1, 2, + 1, 1, 4, 1, 2, 3, 1, 1, 1, 44, 66, 1, 3, 1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, + 25, 6, 34, 63, 1, 1, 3, 1, 59, 54, 2, 1, 71, 27, 2, 14, 21, 7, 185, 57, 103, 64, 31, 8, 2, + 1, 2, 8, 1, 2, 1, 30, 1, 2, 2, 2, 2, 4, 93, 8, 2, 46, 2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, + 72, 5, 1, 18, 73, 103, 8, 88, 33, 31, 9, 1, 45, 1, 7, 1, 1, 49, 30, 2, 22, 1, 14, 73, 7, 1, + 2, 1, 44, 3, 1, 1, 2, 1, 3, 1, 1, 2, 2, 24, 6, 1, 2, 1, 37, 1, 2, 1, 4, 1, 1, 23, 44, 0, + 23, 9, 17, 1, 41, 3, 3, 111, 1, 79, 0, 102, 111, 17, 196, 0, 97, 15, 0, 17, 6, 25, 0, 5, 0, + 0, 47, 0, 0, 7, 31, 17, 79, 17, 30, 18, 48, 16, 4, 31, 21, 5, 19, 0, 45, 211, 64, 32, 25, + 2, 25, 44, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 7, 9, 0, 41, 32, 97, 115, 0, 4, 1, 7, 1, 2, + 1, 0, 15, 1, 29, 3, 2, 1, 14, 4, 8, 0, 0, 107, 5, 13, 3, 9, 7, 10, 4, 1, 0, 85, 1, 71, 1, + 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, + 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, + 31, 6, 6, 213, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 112, 45, 10, 7, 16, 1, 0, 30, 18, + 44, 0, 28, 228, 30, 2, 1, 207, 31, 1, 22, 8, 2, 224, 7, 1, 4, 1, 2, 1, 15, 1, 197, 59, 68, + 3, 1, 3, 1, 0, 4, 1, 27, 1, 2, 1, 1, 2, 1, 1, 10, 1, 4, 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1, + 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, + 1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, 32, 0, 2, 0, 2, 0, 15, 0, 0, 0, + 0, 0, 5, 0, 0, ]; #[inline] pub fn lookup(c: char) -> bool { @@ -126,65 +151,82 @@ pub mod alphabetic { } } -#[rustfmt::skip] pub mod case_ignorable { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 36] = [ - ShortOffsetRunHeader::new(0, 688), ShortOffsetRunHeader::new(11, 4957), - ShortOffsetRunHeader::new(263, 5906), ShortOffsetRunHeader::new(265, 8125), - ShortOffsetRunHeader::new(377, 11388), ShortOffsetRunHeader::new(411, 12293), - ShortOffsetRunHeader::new(423, 40981), ShortOffsetRunHeader::new(435, 42232), - ShortOffsetRunHeader::new(437, 42508), ShortOffsetRunHeader::new(439, 64286), - ShortOffsetRunHeader::new(535, 65024), ShortOffsetRunHeader::new(539, 66045), - ShortOffsetRunHeader::new(569, 67456), ShortOffsetRunHeader::new(575, 68097), - ShortOffsetRunHeader::new(581, 68900), ShortOffsetRunHeader::new(593, 69291), - ShortOffsetRunHeader::new(601, 71727), ShortOffsetRunHeader::new(727, 71995), - ShortOffsetRunHeader::new(731, 73459), ShortOffsetRunHeader::new(797, 78896), - ShortOffsetRunHeader::new(809, 90398), ShortOffsetRunHeader::new(813, 92912), - ShortOffsetRunHeader::new(817, 93504), ShortOffsetRunHeader::new(823, 94031), - ShortOffsetRunHeader::new(827, 110576), ShortOffsetRunHeader::new(837, 113821), - ShortOffsetRunHeader::new(843, 118528), ShortOffsetRunHeader::new(847, 119143), - ShortOffsetRunHeader::new(851, 121344), ShortOffsetRunHeader::new(861, 122880), - ShortOffsetRunHeader::new(873, 123566), ShortOffsetRunHeader::new(889, 124139), - ShortOffsetRunHeader::new(893, 125136), ShortOffsetRunHeader::new(907, 127995), - ShortOffsetRunHeader::new(911, 917505), ShortOffsetRunHeader::new(913, 2032112), + ShortOffsetRunHeader::new(0, 688), + ShortOffsetRunHeader::new(11, 4957), + ShortOffsetRunHeader::new(263, 5906), + ShortOffsetRunHeader::new(265, 8125), + ShortOffsetRunHeader::new(377, 11388), + ShortOffsetRunHeader::new(411, 12293), + ShortOffsetRunHeader::new(423, 40981), + ShortOffsetRunHeader::new(435, 42232), + ShortOffsetRunHeader::new(437, 42508), + ShortOffsetRunHeader::new(439, 64286), + ShortOffsetRunHeader::new(535, 65024), + ShortOffsetRunHeader::new(539, 66045), + ShortOffsetRunHeader::new(569, 67456), + ShortOffsetRunHeader::new(575, 68097), + ShortOffsetRunHeader::new(581, 68900), + ShortOffsetRunHeader::new(593, 69291), + ShortOffsetRunHeader::new(601, 71727), + ShortOffsetRunHeader::new(727, 71995), + ShortOffsetRunHeader::new(731, 73459), + ShortOffsetRunHeader::new(797, 78896), + ShortOffsetRunHeader::new(809, 90398), + ShortOffsetRunHeader::new(813, 92912), + ShortOffsetRunHeader::new(817, 93504), + ShortOffsetRunHeader::new(823, 94031), + ShortOffsetRunHeader::new(827, 110576), + ShortOffsetRunHeader::new(837, 113821), + ShortOffsetRunHeader::new(843, 118528), + ShortOffsetRunHeader::new(847, 119143), + ShortOffsetRunHeader::new(851, 121344), + ShortOffsetRunHeader::new(861, 122880), + ShortOffsetRunHeader::new(873, 123566), + ShortOffsetRunHeader::new(889, 124139), + ShortOffsetRunHeader::new(893, 125136), + ShortOffsetRunHeader::new(907, 127995), + ShortOffsetRunHeader::new(911, 917505), + ShortOffsetRunHeader::new(913, 2032112), ]; static OFFSETS: [u8; 919] = [ 168, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2, 1, 1, 251, 7, 207, 1, 5, 1, 49, - 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35, 1, 10, 21, 16, 1, 101, 8, 1, 10, - 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24, 24, 43, 3, 44, 1, 7, 2, 5, 9, 41, - 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2, - 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, - 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3, - 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1, - 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1, - 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, - 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1, - 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, - 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 46, 2, 12, 20, 4, 48, 1, 1, 5, 1, 1, 5, 1, - 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, 64, 6, 82, 3, 1, 13, - 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1, 3, 11, 3, 13, 3, 13, 3, 13, 2, 12, - 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, 0, 2, 113, 3, 125, 1, - 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, - 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 102, 4, 3, 2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, - 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, - 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, - 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3, 1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, - 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, - 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 38, 1, 26, 5, 1, 1, 0, 2, 24, 1, 52, 6, 70, 11, - 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, 1, 4, 1, 10, 1, 50, 3, 36, 5, 1, - 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, - 2, 3, 1, 37, 7, 3, 5, 70, 6, 13, 1, 1, 1, 1, 1, 14, 2, 85, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, - 1, 4, 2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, - 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, - 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, - 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 65, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, - 1, 23, 1, 0, 17, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 9, 4, 0, 3, 40, 2, 0, 1, 63, 17, 64, 2, 1, - 2, 13, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, - 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, - 0, 1, 61, 4, 0, 5, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 9, 1, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, - 128, 240, 0, + 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35, 1, 10, 21, 16, 1, 101, 8, 1, + 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24, 24, 43, 3, 44, 1, 7, 2, 5, + 9, 41, 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1, 20, 2, + 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, + 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3, 1, + 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, + 4, 4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 7, + 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, + 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 2, 30, 2, 30, 2, 64, + 2, 1, 7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, + 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 46, 2, 12, 20, 4, 48, + 1, 1, 5, 1, 1, 5, 1, 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, + 64, 6, 82, 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1, 3, 11, 3, 13, + 3, 13, 3, 13, 2, 12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, + 0, 2, 113, 3, 125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, + 6, 0, 1, 98, 4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 102, 4, 3, 2, 8, 1, 3, 1, + 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, + 21, 2, 66, 6, 2, 2, 2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, + 14, 2, 5, 2, 1, 1, 100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3, 1, 12, 16, + 34, 1, 2, 1, 169, 1, 7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, + 1, 149, 5, 0, 6, 1, 42, 1, 9, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 38, 1, 26, 5, 1, + 1, 0, 2, 24, 1, 52, 6, 70, 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, + 1, 4, 1, 10, 1, 50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, + 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 70, 6, 13, 1, 1, 1, 1, 1, 14, 2, + 85, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, + 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, + 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, + 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, + 1, 1, 1, 65, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, 1, 23, 1, 0, 17, 6, 15, 0, 12, 3, 3, 0, 5, 59, + 7, 9, 4, 0, 3, 40, 2, 0, 1, 63, 17, 64, 2, 1, 2, 13, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, + 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, + 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, 0, 1, 61, 4, 0, 5, 254, 2, 243, 1, 2, 1, 7, + 2, 5, 1, 9, 1, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0, ]; #[inline] pub fn lookup(c: char) -> bool { @@ -208,36 +250,46 @@ pub mod case_ignorable { } } -#[rustfmt::skip] pub mod cased { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 22] = [ - ShortOffsetRunHeader::new(0, 4256), ShortOffsetRunHeader::new(51, 5024), - ShortOffsetRunHeader::new(61, 7296), ShortOffsetRunHeader::new(65, 7958), - ShortOffsetRunHeader::new(74, 9398), ShortOffsetRunHeader::new(149, 11264), - ShortOffsetRunHeader::new(151, 42560), ShortOffsetRunHeader::new(163, 43824), - ShortOffsetRunHeader::new(177, 64256), ShortOffsetRunHeader::new(183, 65313), - ShortOffsetRunHeader::new(187, 66560), ShortOffsetRunHeader::new(191, 67456), - ShortOffsetRunHeader::new(213, 68736), ShortOffsetRunHeader::new(221, 71840), - ShortOffsetRunHeader::new(229, 93760), ShortOffsetRunHeader::new(231, 119808), - ShortOffsetRunHeader::new(237, 120486), ShortOffsetRunHeader::new(274, 122624), - ShortOffsetRunHeader::new(297, 122928), ShortOffsetRunHeader::new(303, 125184), - ShortOffsetRunHeader::new(305, 127280), ShortOffsetRunHeader::new(307, 1241482), + ShortOffsetRunHeader::new(0, 4256), + ShortOffsetRunHeader::new(51, 5024), + ShortOffsetRunHeader::new(61, 7296), + ShortOffsetRunHeader::new(65, 7958), + ShortOffsetRunHeader::new(74, 9398), + ShortOffsetRunHeader::new(149, 11264), + ShortOffsetRunHeader::new(151, 42560), + ShortOffsetRunHeader::new(163, 43824), + ShortOffsetRunHeader::new(177, 64256), + ShortOffsetRunHeader::new(183, 65313), + ShortOffsetRunHeader::new(187, 66560), + ShortOffsetRunHeader::new(191, 67456), + ShortOffsetRunHeader::new(213, 68736), + ShortOffsetRunHeader::new(221, 71840), + ShortOffsetRunHeader::new(229, 93760), + ShortOffsetRunHeader::new(231, 119808), + ShortOffsetRunHeader::new(237, 120486), + ShortOffsetRunHeader::new(274, 122624), + ShortOffsetRunHeader::new(297, 122928), + ShortOffsetRunHeader::new(303, 125184), + ShortOffsetRunHeader::new(305, 127280), + ShortOffsetRunHeader::new(307, 1241482), ]; static OFFSETS: [u8; 313] = [ 170, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 2, 35, 7, 2, 30, 5, 96, 1, 42, 4, - 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, 1, - 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, 2, 8, - 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, - 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, 1, 2, 4, - 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, 18, 30, 132, - 102, 3, 4, 1, 77, 20, 6, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, 0, 80, 96, - 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, - 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 32, 25, 2, 25, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, - 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, - 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0, - 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, + 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, + 1, 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, + 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, + 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, + 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, + 18, 30, 132, 102, 3, 4, 1, 77, 20, 6, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, + 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, + 42, 1, 9, 0, 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 32, 25, 2, 25, 0, 85, 1, 71, 1, 2, + 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, + 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, + 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, ]; #[inline] pub fn lookup(c: char) -> bool { @@ -261,58 +313,73 @@ pub mod cased { } } -#[rustfmt::skip] pub mod grapheme_extend { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 33] = [ - ShortOffsetRunHeader::new(0, 768), ShortOffsetRunHeader::new(1, 1155), - ShortOffsetRunHeader::new(3, 1425), ShortOffsetRunHeader::new(5, 4957), - ShortOffsetRunHeader::new(249, 5906), ShortOffsetRunHeader::new(251, 8204), - ShortOffsetRunHeader::new(347, 11503), ShortOffsetRunHeader::new(351, 12330), - ShortOffsetRunHeader::new(357, 42607), ShortOffsetRunHeader::new(361, 43010), - ShortOffsetRunHeader::new(369, 64286), ShortOffsetRunHeader::new(435, 65024), - ShortOffsetRunHeader::new(437, 65438), ShortOffsetRunHeader::new(441, 66045), - ShortOffsetRunHeader::new(443, 68097), ShortOffsetRunHeader::new(449, 68900), - ShortOffsetRunHeader::new(461, 69291), ShortOffsetRunHeader::new(465, 71727), - ShortOffsetRunHeader::new(601, 73459), ShortOffsetRunHeader::new(669, 78912), - ShortOffsetRunHeader::new(679, 90398), ShortOffsetRunHeader::new(683, 92912), - ShortOffsetRunHeader::new(687, 94031), ShortOffsetRunHeader::new(691, 113821), - ShortOffsetRunHeader::new(699, 118528), ShortOffsetRunHeader::new(701, 119141), - ShortOffsetRunHeader::new(705, 121344), ShortOffsetRunHeader::new(717, 122880), - ShortOffsetRunHeader::new(729, 123566), ShortOffsetRunHeader::new(743, 124140), - ShortOffsetRunHeader::new(747, 125136), ShortOffsetRunHeader::new(759, 917536), + ShortOffsetRunHeader::new(0, 768), + ShortOffsetRunHeader::new(1, 1155), + ShortOffsetRunHeader::new(3, 1425), + ShortOffsetRunHeader::new(5, 4957), + ShortOffsetRunHeader::new(249, 5906), + ShortOffsetRunHeader::new(251, 8204), + ShortOffsetRunHeader::new(347, 11503), + ShortOffsetRunHeader::new(351, 12330), + ShortOffsetRunHeader::new(357, 42607), + ShortOffsetRunHeader::new(361, 43010), + ShortOffsetRunHeader::new(369, 64286), + ShortOffsetRunHeader::new(435, 65024), + ShortOffsetRunHeader::new(437, 65438), + ShortOffsetRunHeader::new(441, 66045), + ShortOffsetRunHeader::new(443, 68097), + ShortOffsetRunHeader::new(449, 68900), + ShortOffsetRunHeader::new(461, 69291), + ShortOffsetRunHeader::new(465, 71727), + ShortOffsetRunHeader::new(601, 73459), + ShortOffsetRunHeader::new(669, 78912), + ShortOffsetRunHeader::new(679, 90398), + ShortOffsetRunHeader::new(683, 92912), + ShortOffsetRunHeader::new(687, 94031), + ShortOffsetRunHeader::new(691, 113821), + ShortOffsetRunHeader::new(699, 118528), + ShortOffsetRunHeader::new(701, 119141), + ShortOffsetRunHeader::new(705, 121344), + ShortOffsetRunHeader::new(717, 122880), + ShortOffsetRunHeader::new(729, 123566), + ShortOffsetRunHeader::new(743, 124140), + ShortOffsetRunHeader::new(747, 125136), + ShortOffsetRunHeader::new(759, 917536), ShortOffsetRunHeader::new(763, 2032112), ]; static OFFSETS: [u8; 767] = [ 0, 112, 0, 7, 0, 45, 1, 1, 1, 2, 1, 2, 1, 1, 72, 11, 48, 21, 16, 1, 101, 7, 2, 6, 2, 2, 1, - 4, 35, 1, 30, 27, 91, 11, 58, 9, 9, 1, 24, 4, 1, 9, 1, 3, 1, 5, 43, 3, 59, 9, 42, 24, 1, 32, - 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 29, 1, 58, 1, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 26, 1, 2, - 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, - 1, 1, 58, 1, 1, 2, 1, 4, 8, 1, 7, 3, 10, 2, 30, 1, 59, 1, 1, 1, 12, 1, 9, 1, 40, 1, 3, 1, - 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 2, 1, 1, 3, 3, 1, 4, 7, 2, 11, 2, 28, - 2, 57, 2, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 29, 1, 72, 1, 4, 1, 2, 3, 1, 1, 8, 1, 81, 1, 2, 7, - 12, 8, 98, 1, 2, 9, 11, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, - 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 0, 3, 0, 4, 28, 3, - 29, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 9, 1, 45, 3, 1, 1, 117, 2, 34, 1, 118, 3, 4, 2, 9, - 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 48, 46, 2, 12, 20, 4, 48, - 10, 4, 3, 38, 9, 12, 2, 32, 4, 2, 6, 56, 1, 1, 2, 3, 1, 1, 5, 56, 8, 2, 2, 152, 3, 1, 13, 1, - 7, 4, 1, 6, 1, 3, 2, 198, 64, 0, 1, 195, 33, 0, 3, 141, 1, 96, 32, 0, 6, 105, 2, 0, 4, 1, - 10, 32, 2, 80, 2, 0, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 1, 1, - 44, 3, 48, 1, 2, 4, 2, 2, 2, 1, 36, 1, 67, 6, 2, 2, 2, 2, 12, 1, 8, 1, 47, 1, 51, 1, 1, 3, - 2, 2, 5, 2, 1, 1, 42, 2, 8, 1, 238, 1, 2, 1, 4, 1, 0, 1, 0, 16, 16, 16, 0, 2, 0, 1, 226, 1, - 149, 5, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 65, 5, 0, 2, 77, 6, 70, 11, 49, 4, 123, - 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 7, 1, 61, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, - 1, 1, 8, 4, 2, 1, 95, 3, 2, 4, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 1, 1, 1, 1, 12, 1, 9, - 1, 14, 7, 3, 5, 67, 1, 2, 6, 1, 1, 2, 1, 1, 3, 4, 3, 1, 1, 14, 2, 85, 8, 2, 3, 1, 1, 23, 1, - 81, 1, 2, 6, 1, 1, 2, 1, 1, 2, 1, 2, 235, 1, 2, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, - 106, 1, 1, 1, 2, 8, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 245, 1, 10, 4, 4, 1, 144, 4, 2, 2, - 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, 1, 201, 7, 1, 6, - 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 0, 2, 11, - 2, 52, 5, 5, 3, 23, 1, 0, 1, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 0, 1, 63, 4, 81, 1, 11, 2, 0, - 2, 0, 46, 2, 23, 0, 5, 3, 6, 8, 8, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, - 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61, 4, 0, 4, 254, 2, 243, 1, 2, 1, - 7, 2, 5, 1, 0, 7, 109, 7, 0, 96, 128, 240, 0, + 4, 35, 1, 30, 27, 91, 11, 58, 9, 9, 1, 24, 4, 1, 9, 1, 3, 1, 5, 43, 3, 59, 9, 42, 24, 1, + 32, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 29, 1, 58, 1, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 26, + 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, + 22, 6, 1, 1, 58, 1, 1, 2, 1, 4, 8, 1, 7, 3, 10, 2, 30, 1, 59, 1, 1, 1, 12, 1, 9, 1, 40, 1, + 3, 1, 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 2, 1, 1, 3, 3, 1, 4, 7, 2, 11, + 2, 28, 2, 57, 2, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 29, 1, 72, 1, 4, 1, 2, 3, 1, 1, 8, 1, 81, + 1, 2, 7, 12, 8, 98, 1, 2, 9, 11, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, + 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 0, 3, 0, + 4, 28, 3, 29, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 9, 1, 45, 3, 1, 1, 117, 2, 34, 1, 118, 3, + 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 48, 46, 2, 12, + 20, 4, 48, 10, 4, 3, 38, 9, 12, 2, 32, 4, 2, 6, 56, 1, 1, 2, 3, 1, 1, 5, 56, 8, 2, 2, 152, + 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 198, 64, 0, 1, 195, 33, 0, 3, 141, 1, 96, 32, 0, 6, 105, + 2, 0, 4, 1, 10, 32, 2, 80, 2, 0, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, + 25, 11, 1, 1, 44, 3, 48, 1, 2, 4, 2, 2, 2, 1, 36, 1, 67, 6, 2, 2, 2, 2, 12, 1, 8, 1, 47, 1, + 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 42, 2, 8, 1, 238, 1, 2, 1, 4, 1, 0, 1, 0, 16, 16, 16, 0, 2, + 0, 1, 226, 1, 149, 5, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 65, 5, 0, 2, 77, 6, 70, + 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 7, 1, 61, 3, 36, 5, 1, 8, 62, + 1, 12, 2, 52, 9, 1, 1, 8, 4, 2, 1, 95, 3, 2, 4, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 1, + 1, 1, 1, 12, 1, 9, 1, 14, 7, 3, 5, 67, 1, 2, 6, 1, 1, 2, 1, 1, 3, 4, 3, 1, 1, 14, 2, 85, 8, + 2, 3, 1, 1, 23, 1, 81, 1, 2, 6, 1, 1, 2, 1, 1, 2, 1, 2, 235, 1, 2, 4, 6, 2, 1, 2, 27, 2, + 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 8, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 245, 1, 10, 4, + 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, + 3, 1, 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, + 2, 3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 3, 23, 1, 0, 1, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 0, + 1, 63, 4, 81, 1, 11, 2, 0, 2, 0, 46, 2, 23, 0, 5, 3, 6, 8, 8, 2, 7, 30, 4, 148, 3, 0, 55, + 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61, + 4, 0, 4, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 0, 7, 109, 7, 0, 96, 128, 240, 0, ]; #[inline] pub fn lookup(c: char) -> bool { @@ -336,14 +403,13 @@ pub mod grapheme_extend { } } -#[rustfmt::skip] pub mod lowercase { static BITSET_CHUNKS_MAP: [u8; 123] = [ 12, 17, 0, 0, 9, 0, 0, 13, 14, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, - 3, 18, 0, 7, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, + 0, 3, 18, 0, 7, ]; static BITSET_INDEX_CHUNKS: [[u8; 16]; 20] = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], @@ -427,65 +493,105 @@ pub mod lowercase { 0b1110101111000000000000000000000000001111111111111111111111111100, ]; static BITSET_MAPPING: [(u8, u8); 22] = [ - (0, 64), (1, 184), (1, 182), (1, 179), (1, 172), (1, 168), (1, 161), (1, 146), (1, 144), - (1, 140), (1, 136), (1, 132), (2, 146), (2, 144), (2, 83), (3, 93), (3, 147), (3, 133), - (4, 12), (4, 6), (5, 187), (6, 78), + (0, 64), + (1, 184), + (1, 182), + (1, 179), + (1, 172), + (1, 168), + (1, 161), + (1, 146), + (1, 144), + (1, 140), + (1, 136), + (1, 132), + (2, 146), + (2, 144), + (2, 83), + (3, 93), + (3, 147), + (3, 133), + (4, 12), + (4, 6), + (5, 187), + (6, 78), ]; pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); - (c as u32) >= 0xaa && - super::bitset_search( - c as u32, - &BITSET_CHUNKS_MAP, - &BITSET_INDEX_CHUNKS, - &BITSET_CANONICAL, - &BITSET_MAPPING, - ) + (c as u32) >= 0xaa + && super::bitset_search( + c as u32, + &BITSET_CHUNKS_MAP, + &BITSET_INDEX_CHUNKS, + &BITSET_CANONICAL, + &BITSET_MAPPING, + ) } } -#[rustfmt::skip] pub mod n { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 43] = [ - ShortOffsetRunHeader::new(0, 1632), ShortOffsetRunHeader::new(7, 2406), - ShortOffsetRunHeader::new(13, 4160), ShortOffsetRunHeader::new(47, 4969), - ShortOffsetRunHeader::new(51, 5870), ShortOffsetRunHeader::new(53, 6470), - ShortOffsetRunHeader::new(61, 8304), ShortOffsetRunHeader::new(77, 9312), - ShortOffsetRunHeader::new(87, 10102), ShortOffsetRunHeader::new(91, 11517), - ShortOffsetRunHeader::new(93, 12295), ShortOffsetRunHeader::new(95, 12690), - ShortOffsetRunHeader::new(101, 42528), ShortOffsetRunHeader::new(113, 43056), - ShortOffsetRunHeader::new(117, 44016), ShortOffsetRunHeader::new(129, 65296), - ShortOffsetRunHeader::new(131, 65799), ShortOffsetRunHeader::new(133, 66273), - ShortOffsetRunHeader::new(139, 67672), ShortOffsetRunHeader::new(151, 68858), - ShortOffsetRunHeader::new(181, 69216), ShortOffsetRunHeader::new(187, 70736), - ShortOffsetRunHeader::new(207, 71248), ShortOffsetRunHeader::new(211, 71904), - ShortOffsetRunHeader::new(219, 72688), ShortOffsetRunHeader::new(223, 73552), - ShortOffsetRunHeader::new(233, 74752), ShortOffsetRunHeader::new(237, 90416), - ShortOffsetRunHeader::new(239, 92768), ShortOffsetRunHeader::new(241, 93552), - ShortOffsetRunHeader::new(249, 93824), ShortOffsetRunHeader::new(251, 94196), - ShortOffsetRunHeader::new(253, 118000), ShortOffsetRunHeader::new(255, 119488), - ShortOffsetRunHeader::new(257, 120782), ShortOffsetRunHeader::new(263, 123200), - ShortOffsetRunHeader::new(265, 123632), ShortOffsetRunHeader::new(267, 124144), - ShortOffsetRunHeader::new(269, 125127), ShortOffsetRunHeader::new(273, 126065), - ShortOffsetRunHeader::new(277, 127232), ShortOffsetRunHeader::new(287, 130032), + ShortOffsetRunHeader::new(0, 1632), + ShortOffsetRunHeader::new(7, 2406), + ShortOffsetRunHeader::new(13, 4160), + ShortOffsetRunHeader::new(47, 4969), + ShortOffsetRunHeader::new(51, 5870), + ShortOffsetRunHeader::new(53, 6470), + ShortOffsetRunHeader::new(61, 8304), + ShortOffsetRunHeader::new(77, 9312), + ShortOffsetRunHeader::new(87, 10102), + ShortOffsetRunHeader::new(91, 11517), + ShortOffsetRunHeader::new(93, 12295), + ShortOffsetRunHeader::new(95, 12690), + ShortOffsetRunHeader::new(101, 42528), + ShortOffsetRunHeader::new(113, 43056), + ShortOffsetRunHeader::new(117, 44016), + ShortOffsetRunHeader::new(129, 65296), + ShortOffsetRunHeader::new(131, 65799), + ShortOffsetRunHeader::new(133, 66273), + ShortOffsetRunHeader::new(139, 67672), + ShortOffsetRunHeader::new(151, 68858), + ShortOffsetRunHeader::new(181, 69216), + ShortOffsetRunHeader::new(187, 70736), + ShortOffsetRunHeader::new(207, 71248), + ShortOffsetRunHeader::new(211, 71904), + ShortOffsetRunHeader::new(219, 72688), + ShortOffsetRunHeader::new(223, 73552), + ShortOffsetRunHeader::new(233, 74752), + ShortOffsetRunHeader::new(237, 90416), + ShortOffsetRunHeader::new(239, 92768), + ShortOffsetRunHeader::new(241, 93552), + ShortOffsetRunHeader::new(249, 93824), + ShortOffsetRunHeader::new(251, 94196), + ShortOffsetRunHeader::new(253, 118000), + ShortOffsetRunHeader::new(255, 119488), + ShortOffsetRunHeader::new(257, 120782), + ShortOffsetRunHeader::new(263, 123200), + ShortOffsetRunHeader::new(265, 123632), + ShortOffsetRunHeader::new(267, 124144), + ShortOffsetRunHeader::new(269, 125127), + ShortOffsetRunHeader::new(273, 126065), + ShortOffsetRunHeader::new(277, 127232), + ShortOffsetRunHeader::new(287, 130032), ShortOffsetRunHeader::new(289, 1244154), ]; static OFFSETS: [u8; 291] = [ 178, 2, 5, 1, 2, 3, 0, 10, 134, 10, 198, 10, 0, 10, 118, 10, 4, 6, 108, 10, 118, 10, 118, 10, 2, 6, 110, 13, 115, 10, 8, 7, 103, 10, 104, 7, 7, 19, 109, 10, 96, 10, 118, 10, 70, 20, - 0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10, 182, 10, - 86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0, 1, 0, 1, - 25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6, 154, 10, - 38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4, 29, 1, 8, - 1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9, 52, 2, - 30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 6, 10, 0, 31, 158, 10, 42, 4, 112, 7, 134, - 30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 6, 20, 76, 12, - 0, 19, 93, 10, 0, 10, 86, 29, 227, 10, 70, 10, 54, 10, 0, 10, 102, 21, 0, 111, 0, 10, 0, 10, - 86, 10, 134, 10, 1, 7, 0, 10, 0, 23, 0, 3, 0, 10, 0, 20, 12, 20, 108, 25, 0, 50, 0, 10, 0, - 10, 0, 10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0, + 0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10, 182, + 10, 86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0, 1, + 0, 1, 25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6, 154, + 10, 38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4, 29, + 1, 8, 1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9, + 52, 2, 30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 6, 10, 0, 31, 158, 10, 42, 4, 112, + 7, 134, 30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 6, + 20, 76, 12, 0, 19, 93, 10, 0, 10, 86, 29, 227, 10, 70, 10, 54, 10, 0, 10, 102, 21, 0, 111, + 0, 10, 0, 10, 86, 10, 134, 10, 1, 7, 0, 10, 0, 23, 0, 3, 0, 10, 0, 20, 12, 20, 108, 25, 0, + 50, 0, 10, 0, 10, 0, 10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, + 0, 10, 0, ]; #[inline] pub fn lookup(c: char) -> bool { @@ -509,14 +615,13 @@ pub mod n { } } -#[rustfmt::skip] pub mod uppercase { static BITSET_CHUNKS_MAP: [u8; 125] = [ 3, 14, 6, 6, 0, 6, 6, 2, 5, 12, 6, 15, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 7, 6, 13, 6, 11, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 16, 6, 6, - 6, 6, 10, 6, 4, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 6, 13, 6, 11, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 16, 6, + 6, 6, 6, 10, 6, 4, ]; static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [ [44, 44, 5, 35, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 5, 0], @@ -584,36 +689,57 @@ pub mod uppercase { 0b1111111100000000111111110000000000111111000000001111111100000000, ]; static BITSET_MAPPING: [(u8, u8); 25] = [ - (0, 182), (0, 74), (0, 166), (0, 162), (0, 159), (0, 150), (0, 148), (0, 142), (0, 134), - (0, 131), (0, 64), (1, 66), (1, 70), (1, 83), (1, 12), (1, 8), (2, 146), (2, 140), (2, 134), - (2, 130), (3, 164), (3, 146), (3, 20), (4, 178), (4, 171), + (0, 182), + (0, 74), + (0, 166), + (0, 162), + (0, 159), + (0, 150), + (0, 148), + (0, 142), + (0, 134), + (0, 131), + (0, 64), + (1, 66), + (1, 70), + (1, 83), + (1, 12), + (1, 8), + (2, 146), + (2, 140), + (2, 134), + (2, 130), + (3, 164), + (3, 146), + (3, 20), + (4, 178), + (4, 171), ]; pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); - (c as u32) >= 0xc0 && - super::bitset_search( - c as u32, - &BITSET_CHUNKS_MAP, - &BITSET_INDEX_CHUNKS, - &BITSET_CANONICAL, - &BITSET_MAPPING, - ) + (c as u32) >= 0xc0 + && super::bitset_search( + c as u32, + &BITSET_CHUNKS_MAP, + &BITSET_INDEX_CHUNKS, + &BITSET_CANONICAL, + &BITSET_MAPPING, + ) } } -#[rustfmt::skip] pub mod white_space { static WHITESPACE_MAP: [u8; 256] = [ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; #[inline] pub const fn lookup(c: char) -> bool { @@ -628,7 +754,6 @@ pub mod white_space { } } -#[rustfmt::skip] pub mod conversions { #[inline] pub fn to_lower(c: char) -> [char; 3] { @@ -656,6 +781,7 @@ pub mod conversions { ) } } + #[rustfmt::skip] static LOWERCASE_TABLE: &[(char, u32); 1462] = &[ ('\u{c0}', 224), ('\u{c1}', 225), ('\u{c2}', 226), ('\u{c3}', 227), ('\u{c4}', 228), ('\u{c5}', 229), ('\u{c6}', 230), ('\u{c7}', 231), ('\u{c8}', 232), ('\u{c9}', 233), @@ -1013,6 +1139,7 @@ pub mod conversions { ('\u{1e921}', 125251), ]; + #[rustfmt::skip] static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[ ['i', '\u{307}', '\u{0}'], ]; @@ -1043,6 +1170,7 @@ pub mod conversions { ) } } + #[rustfmt::skip] static UPPERCASE_TABLE: &[(char, u32); 1554] = &[ ('\u{b5}', 924), ('\u{df}', 4194304), ('\u{e0}', 192), ('\u{e1}', 193), ('\u{e2}', 194), ('\u{e3}', 195), ('\u{e4}', 196), ('\u{e5}', 197), ('\u{e6}', 198), ('\u{e7}', 199), @@ -1423,6 +1551,7 @@ pub mod conversions { ('\u{1e941}', 125215), ('\u{1e942}', 125216), ('\u{1e943}', 125217), ]; + #[rustfmt::skip] static UPPERCASE_TABLE_MULTI: &[[char; 3]; 102] = &[ ['S', 'S', '\u{0}'], ['\u{2bc}', 'N', '\u{0}'], ['J', '\u{30c}', '\u{0}'], ['\u{399}', '\u{308}', '\u{301}'], ['\u{3a5}', '\u{308}', '\u{301}'], From 21d9eb7728d7f0c97b27e5041df8ca743bba86e8 Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Sat, 11 Oct 2025 02:03:44 +0100 Subject: [PATCH 246/358] refactor: make string formatting more readable To make the final output code easier to see: * Get rid of the unnecessary line-noise of `.unwrap()`ing calls to `write!()` by moving the `.unwrap()` into a macro. * Join consecutive `write!()` calls using a single multiline format string. * Replace `.push()` and `.push_str(format!())` with `write!()`. * If after doing all of the above, there is only a single `write!()` call in the function, just construct the string directly with `format!()`. --- core/src/unicode/unicode_data.rs | 1676 ++++++++++++++++-------------- 1 file changed, 883 insertions(+), 793 deletions(-) diff --git a/core/src/unicode/unicode_data.rs b/core/src/unicode/unicode_data.rs index 9d03529229bf1..81d0484310cf1 100644 --- a/core/src/unicode/unicode_data.rs +++ b/core/src/unicode/unicode_data.rs @@ -12,6 +12,7 @@ // Total : 31911 bytes use super::rt::*; + pub const UNICODE_VERSION: (u8, u8, u8) = (17, 0, 0); pub mod alphabetic { @@ -129,6 +130,7 @@ pub mod alphabetic { 1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, 32, 0, 2, 0, 2, 0, 15, 0, 0, 0, 0, 0, 5, 0, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -228,6 +230,7 @@ pub mod case_ignorable { 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, 0, 1, 61, 4, 0, 5, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 9, 1, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -291,6 +294,7 @@ pub mod cased { 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -381,6 +385,7 @@ pub mod grapheme_extend { 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61, 4, 0, 4, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 0, 7, 109, 7, 0, 96, 128, 240, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -593,6 +598,7 @@ pub mod n { 50, 0, 10, 0, 10, 0, 10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0, ]; + #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -741,20 +747,420 @@ pub mod white_space { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; + #[inline] pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); match c as u32 >> 8 { - 0 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0, - 22 => c as u32 == 0x1680, - 32 => WHITESPACE_MAP[c as usize & 0xff] & 2 != 0, - 48 => c as u32 == 0x3000, + 0x00 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0, + 0x16 => c as u32 == 0x1680, + 0x20 => WHITESPACE_MAP[c as usize & 0xff] & 2 != 0, + 0x30 => c as u32 == 0x3000, _ => false, } } } pub mod conversions { + #[rustfmt::skip] + static LOWERCASE_TABLE: &[(char, u32); 1462] = &[ + ('\u{c0}', 0xe0), ('\u{c1}', 0xe1), ('\u{c2}', 0xe2), ('\u{c3}', 0xe3), ('\u{c4}', 0xe4), + ('\u{c5}', 0xe5), ('\u{c6}', 0xe6), ('\u{c7}', 0xe7), ('\u{c8}', 0xe8), ('\u{c9}', 0xe9), + ('\u{ca}', 0xea), ('\u{cb}', 0xeb), ('\u{cc}', 0xec), ('\u{cd}', 0xed), ('\u{ce}', 0xee), + ('\u{cf}', 0xef), ('\u{d0}', 0xf0), ('\u{d1}', 0xf1), ('\u{d2}', 0xf2), ('\u{d3}', 0xf3), + ('\u{d4}', 0xf4), ('\u{d5}', 0xf5), ('\u{d6}', 0xf6), ('\u{d8}', 0xf8), ('\u{d9}', 0xf9), + ('\u{da}', 0xfa), ('\u{db}', 0xfb), ('\u{dc}', 0xfc), ('\u{dd}', 0xfd), ('\u{de}', 0xfe), + ('\u{100}', 0x101), ('\u{102}', 0x103), ('\u{104}', 0x105), ('\u{106}', 0x107), + ('\u{108}', 0x109), ('\u{10a}', 0x10b), ('\u{10c}', 0x10d), ('\u{10e}', 0x10f), + ('\u{110}', 0x111), ('\u{112}', 0x113), ('\u{114}', 0x115), ('\u{116}', 0x117), + ('\u{118}', 0x119), ('\u{11a}', 0x11b), ('\u{11c}', 0x11d), ('\u{11e}', 0x11f), + ('\u{120}', 0x121), ('\u{122}', 0x123), ('\u{124}', 0x125), ('\u{126}', 0x127), + ('\u{128}', 0x129), ('\u{12a}', 0x12b), ('\u{12c}', 0x12d), ('\u{12e}', 0x12f), + ('\u{130}', 0x400000), ('\u{132}', 0x133), ('\u{134}', 0x135), ('\u{136}', 0x137), + ('\u{139}', 0x13a), ('\u{13b}', 0x13c), ('\u{13d}', 0x13e), ('\u{13f}', 0x140), + ('\u{141}', 0x142), ('\u{143}', 0x144), ('\u{145}', 0x146), ('\u{147}', 0x148), + ('\u{14a}', 0x14b), ('\u{14c}', 0x14d), ('\u{14e}', 0x14f), ('\u{150}', 0x151), + ('\u{152}', 0x153), ('\u{154}', 0x155), ('\u{156}', 0x157), ('\u{158}', 0x159), + ('\u{15a}', 0x15b), ('\u{15c}', 0x15d), ('\u{15e}', 0x15f), ('\u{160}', 0x161), + ('\u{162}', 0x163), ('\u{164}', 0x165), ('\u{166}', 0x167), ('\u{168}', 0x169), + ('\u{16a}', 0x16b), ('\u{16c}', 0x16d), ('\u{16e}', 0x16f), ('\u{170}', 0x171), + ('\u{172}', 0x173), ('\u{174}', 0x175), ('\u{176}', 0x177), ('\u{178}', 0xff), + ('\u{179}', 0x17a), ('\u{17b}', 0x17c), ('\u{17d}', 0x17e), ('\u{181}', 0x253), + ('\u{182}', 0x183), ('\u{184}', 0x185), ('\u{186}', 0x254), ('\u{187}', 0x188), + ('\u{189}', 0x256), ('\u{18a}', 0x257), ('\u{18b}', 0x18c), ('\u{18e}', 0x1dd), + ('\u{18f}', 0x259), ('\u{190}', 0x25b), ('\u{191}', 0x192), ('\u{193}', 0x260), + ('\u{194}', 0x263), ('\u{196}', 0x269), ('\u{197}', 0x268), ('\u{198}', 0x199), + ('\u{19c}', 0x26f), ('\u{19d}', 0x272), ('\u{19f}', 0x275), ('\u{1a0}', 0x1a1), + ('\u{1a2}', 0x1a3), ('\u{1a4}', 0x1a5), ('\u{1a6}', 0x280), ('\u{1a7}', 0x1a8), + ('\u{1a9}', 0x283), ('\u{1ac}', 0x1ad), ('\u{1ae}', 0x288), ('\u{1af}', 0x1b0), + ('\u{1b1}', 0x28a), ('\u{1b2}', 0x28b), ('\u{1b3}', 0x1b4), ('\u{1b5}', 0x1b6), + ('\u{1b7}', 0x292), ('\u{1b8}', 0x1b9), ('\u{1bc}', 0x1bd), ('\u{1c4}', 0x1c6), + ('\u{1c5}', 0x1c6), ('\u{1c7}', 0x1c9), ('\u{1c8}', 0x1c9), ('\u{1ca}', 0x1cc), + ('\u{1cb}', 0x1cc), ('\u{1cd}', 0x1ce), ('\u{1cf}', 0x1d0), ('\u{1d1}', 0x1d2), + ('\u{1d3}', 0x1d4), ('\u{1d5}', 0x1d6), ('\u{1d7}', 0x1d8), ('\u{1d9}', 0x1da), + ('\u{1db}', 0x1dc), ('\u{1de}', 0x1df), ('\u{1e0}', 0x1e1), ('\u{1e2}', 0x1e3), + ('\u{1e4}', 0x1e5), ('\u{1e6}', 0x1e7), ('\u{1e8}', 0x1e9), ('\u{1ea}', 0x1eb), + ('\u{1ec}', 0x1ed), ('\u{1ee}', 0x1ef), ('\u{1f1}', 0x1f3), ('\u{1f2}', 0x1f3), + ('\u{1f4}', 0x1f5), ('\u{1f6}', 0x195), ('\u{1f7}', 0x1bf), ('\u{1f8}', 0x1f9), + ('\u{1fa}', 0x1fb), ('\u{1fc}', 0x1fd), ('\u{1fe}', 0x1ff), ('\u{200}', 0x201), + ('\u{202}', 0x203), ('\u{204}', 0x205), ('\u{206}', 0x207), ('\u{208}', 0x209), + ('\u{20a}', 0x20b), ('\u{20c}', 0x20d), ('\u{20e}', 0x20f), ('\u{210}', 0x211), + ('\u{212}', 0x213), ('\u{214}', 0x215), ('\u{216}', 0x217), ('\u{218}', 0x219), + ('\u{21a}', 0x21b), ('\u{21c}', 0x21d), ('\u{21e}', 0x21f), ('\u{220}', 0x19e), + ('\u{222}', 0x223), ('\u{224}', 0x225), ('\u{226}', 0x227), ('\u{228}', 0x229), + ('\u{22a}', 0x22b), ('\u{22c}', 0x22d), ('\u{22e}', 0x22f), ('\u{230}', 0x231), + ('\u{232}', 0x233), ('\u{23a}', 0x2c65), ('\u{23b}', 0x23c), ('\u{23d}', 0x19a), + ('\u{23e}', 0x2c66), ('\u{241}', 0x242), ('\u{243}', 0x180), ('\u{244}', 0x289), + ('\u{245}', 0x28c), ('\u{246}', 0x247), ('\u{248}', 0x249), ('\u{24a}', 0x24b), + ('\u{24c}', 0x24d), ('\u{24e}', 0x24f), ('\u{370}', 0x371), ('\u{372}', 0x373), + ('\u{376}', 0x377), ('\u{37f}', 0x3f3), ('\u{386}', 0x3ac), ('\u{388}', 0x3ad), + ('\u{389}', 0x3ae), ('\u{38a}', 0x3af), ('\u{38c}', 0x3cc), ('\u{38e}', 0x3cd), + ('\u{38f}', 0x3ce), ('\u{391}', 0x3b1), ('\u{392}', 0x3b2), ('\u{393}', 0x3b3), + ('\u{394}', 0x3b4), ('\u{395}', 0x3b5), ('\u{396}', 0x3b6), ('\u{397}', 0x3b7), + ('\u{398}', 0x3b8), ('\u{399}', 0x3b9), ('\u{39a}', 0x3ba), ('\u{39b}', 0x3bb), + ('\u{39c}', 0x3bc), ('\u{39d}', 0x3bd), ('\u{39e}', 0x3be), ('\u{39f}', 0x3bf), + ('\u{3a0}', 0x3c0), ('\u{3a1}', 0x3c1), ('\u{3a3}', 0x3c3), ('\u{3a4}', 0x3c4), + ('\u{3a5}', 0x3c5), ('\u{3a6}', 0x3c6), ('\u{3a7}', 0x3c7), ('\u{3a8}', 0x3c8), + ('\u{3a9}', 0x3c9), ('\u{3aa}', 0x3ca), ('\u{3ab}', 0x3cb), ('\u{3cf}', 0x3d7), + ('\u{3d8}', 0x3d9), ('\u{3da}', 0x3db), ('\u{3dc}', 0x3dd), ('\u{3de}', 0x3df), + ('\u{3e0}', 0x3e1), ('\u{3e2}', 0x3e3), ('\u{3e4}', 0x3e5), ('\u{3e6}', 0x3e7), + ('\u{3e8}', 0x3e9), ('\u{3ea}', 0x3eb), ('\u{3ec}', 0x3ed), ('\u{3ee}', 0x3ef), + ('\u{3f4}', 0x3b8), ('\u{3f7}', 0x3f8), ('\u{3f9}', 0x3f2), ('\u{3fa}', 0x3fb), + ('\u{3fd}', 0x37b), ('\u{3fe}', 0x37c), ('\u{3ff}', 0x37d), ('\u{400}', 0x450), + ('\u{401}', 0x451), ('\u{402}', 0x452), ('\u{403}', 0x453), ('\u{404}', 0x454), + ('\u{405}', 0x455), ('\u{406}', 0x456), ('\u{407}', 0x457), ('\u{408}', 0x458), + ('\u{409}', 0x459), ('\u{40a}', 0x45a), ('\u{40b}', 0x45b), ('\u{40c}', 0x45c), + ('\u{40d}', 0x45d), ('\u{40e}', 0x45e), ('\u{40f}', 0x45f), ('\u{410}', 0x430), + ('\u{411}', 0x431), ('\u{412}', 0x432), ('\u{413}', 0x433), ('\u{414}', 0x434), + ('\u{415}', 0x435), ('\u{416}', 0x436), ('\u{417}', 0x437), ('\u{418}', 0x438), + ('\u{419}', 0x439), ('\u{41a}', 0x43a), ('\u{41b}', 0x43b), ('\u{41c}', 0x43c), + ('\u{41d}', 0x43d), ('\u{41e}', 0x43e), ('\u{41f}', 0x43f), ('\u{420}', 0x440), + ('\u{421}', 0x441), ('\u{422}', 0x442), ('\u{423}', 0x443), ('\u{424}', 0x444), + ('\u{425}', 0x445), ('\u{426}', 0x446), ('\u{427}', 0x447), ('\u{428}', 0x448), + ('\u{429}', 0x449), ('\u{42a}', 0x44a), ('\u{42b}', 0x44b), ('\u{42c}', 0x44c), + ('\u{42d}', 0x44d), ('\u{42e}', 0x44e), ('\u{42f}', 0x44f), ('\u{460}', 0x461), + ('\u{462}', 0x463), ('\u{464}', 0x465), ('\u{466}', 0x467), ('\u{468}', 0x469), + ('\u{46a}', 0x46b), ('\u{46c}', 0x46d), ('\u{46e}', 0x46f), ('\u{470}', 0x471), + ('\u{472}', 0x473), ('\u{474}', 0x475), ('\u{476}', 0x477), ('\u{478}', 0x479), + ('\u{47a}', 0x47b), ('\u{47c}', 0x47d), ('\u{47e}', 0x47f), ('\u{480}', 0x481), + ('\u{48a}', 0x48b), ('\u{48c}', 0x48d), ('\u{48e}', 0x48f), ('\u{490}', 0x491), + ('\u{492}', 0x493), ('\u{494}', 0x495), ('\u{496}', 0x497), ('\u{498}', 0x499), + ('\u{49a}', 0x49b), ('\u{49c}', 0x49d), ('\u{49e}', 0x49f), ('\u{4a0}', 0x4a1), + ('\u{4a2}', 0x4a3), ('\u{4a4}', 0x4a5), ('\u{4a6}', 0x4a7), ('\u{4a8}', 0x4a9), + ('\u{4aa}', 0x4ab), ('\u{4ac}', 0x4ad), ('\u{4ae}', 0x4af), ('\u{4b0}', 0x4b1), + ('\u{4b2}', 0x4b3), ('\u{4b4}', 0x4b5), ('\u{4b6}', 0x4b7), ('\u{4b8}', 0x4b9), + ('\u{4ba}', 0x4bb), ('\u{4bc}', 0x4bd), ('\u{4be}', 0x4bf), ('\u{4c0}', 0x4cf), + ('\u{4c1}', 0x4c2), ('\u{4c3}', 0x4c4), ('\u{4c5}', 0x4c6), ('\u{4c7}', 0x4c8), + ('\u{4c9}', 0x4ca), ('\u{4cb}', 0x4cc), ('\u{4cd}', 0x4ce), ('\u{4d0}', 0x4d1), + ('\u{4d2}', 0x4d3), ('\u{4d4}', 0x4d5), ('\u{4d6}', 0x4d7), ('\u{4d8}', 0x4d9), + ('\u{4da}', 0x4db), ('\u{4dc}', 0x4dd), ('\u{4de}', 0x4df), ('\u{4e0}', 0x4e1), + ('\u{4e2}', 0x4e3), ('\u{4e4}', 0x4e5), ('\u{4e6}', 0x4e7), ('\u{4e8}', 0x4e9), + ('\u{4ea}', 0x4eb), ('\u{4ec}', 0x4ed), ('\u{4ee}', 0x4ef), ('\u{4f0}', 0x4f1), + ('\u{4f2}', 0x4f3), ('\u{4f4}', 0x4f5), ('\u{4f6}', 0x4f7), ('\u{4f8}', 0x4f9), + ('\u{4fa}', 0x4fb), ('\u{4fc}', 0x4fd), ('\u{4fe}', 0x4ff), ('\u{500}', 0x501), + ('\u{502}', 0x503), ('\u{504}', 0x505), ('\u{506}', 0x507), ('\u{508}', 0x509), + ('\u{50a}', 0x50b), ('\u{50c}', 0x50d), ('\u{50e}', 0x50f), ('\u{510}', 0x511), + ('\u{512}', 0x513), ('\u{514}', 0x515), ('\u{516}', 0x517), ('\u{518}', 0x519), + ('\u{51a}', 0x51b), ('\u{51c}', 0x51d), ('\u{51e}', 0x51f), ('\u{520}', 0x521), + ('\u{522}', 0x523), ('\u{524}', 0x525), ('\u{526}', 0x527), ('\u{528}', 0x529), + ('\u{52a}', 0x52b), ('\u{52c}', 0x52d), ('\u{52e}', 0x52f), ('\u{531}', 0x561), + ('\u{532}', 0x562), ('\u{533}', 0x563), ('\u{534}', 0x564), ('\u{535}', 0x565), + ('\u{536}', 0x566), ('\u{537}', 0x567), ('\u{538}', 0x568), ('\u{539}', 0x569), + ('\u{53a}', 0x56a), ('\u{53b}', 0x56b), ('\u{53c}', 0x56c), ('\u{53d}', 0x56d), + ('\u{53e}', 0x56e), ('\u{53f}', 0x56f), ('\u{540}', 0x570), ('\u{541}', 0x571), + ('\u{542}', 0x572), ('\u{543}', 0x573), ('\u{544}', 0x574), ('\u{545}', 0x575), + ('\u{546}', 0x576), ('\u{547}', 0x577), ('\u{548}', 0x578), ('\u{549}', 0x579), + ('\u{54a}', 0x57a), ('\u{54b}', 0x57b), ('\u{54c}', 0x57c), ('\u{54d}', 0x57d), + ('\u{54e}', 0x57e), ('\u{54f}', 0x57f), ('\u{550}', 0x580), ('\u{551}', 0x581), + ('\u{552}', 0x582), ('\u{553}', 0x583), ('\u{554}', 0x584), ('\u{555}', 0x585), + ('\u{556}', 0x586), ('\u{10a0}', 0x2d00), ('\u{10a1}', 0x2d01), ('\u{10a2}', 0x2d02), + ('\u{10a3}', 0x2d03), ('\u{10a4}', 0x2d04), ('\u{10a5}', 0x2d05), ('\u{10a6}', 0x2d06), + ('\u{10a7}', 0x2d07), ('\u{10a8}', 0x2d08), ('\u{10a9}', 0x2d09), ('\u{10aa}', 0x2d0a), + ('\u{10ab}', 0x2d0b), ('\u{10ac}', 0x2d0c), ('\u{10ad}', 0x2d0d), ('\u{10ae}', 0x2d0e), + ('\u{10af}', 0x2d0f), ('\u{10b0}', 0x2d10), ('\u{10b1}', 0x2d11), ('\u{10b2}', 0x2d12), + ('\u{10b3}', 0x2d13), ('\u{10b4}', 0x2d14), ('\u{10b5}', 0x2d15), ('\u{10b6}', 0x2d16), + ('\u{10b7}', 0x2d17), ('\u{10b8}', 0x2d18), ('\u{10b9}', 0x2d19), ('\u{10ba}', 0x2d1a), + ('\u{10bb}', 0x2d1b), ('\u{10bc}', 0x2d1c), ('\u{10bd}', 0x2d1d), ('\u{10be}', 0x2d1e), + ('\u{10bf}', 0x2d1f), ('\u{10c0}', 0x2d20), ('\u{10c1}', 0x2d21), ('\u{10c2}', 0x2d22), + ('\u{10c3}', 0x2d23), ('\u{10c4}', 0x2d24), ('\u{10c5}', 0x2d25), ('\u{10c7}', 0x2d27), + ('\u{10cd}', 0x2d2d), ('\u{13a0}', 0xab70), ('\u{13a1}', 0xab71), ('\u{13a2}', 0xab72), + ('\u{13a3}', 0xab73), ('\u{13a4}', 0xab74), ('\u{13a5}', 0xab75), ('\u{13a6}', 0xab76), + ('\u{13a7}', 0xab77), ('\u{13a8}', 0xab78), ('\u{13a9}', 0xab79), ('\u{13aa}', 0xab7a), + ('\u{13ab}', 0xab7b), ('\u{13ac}', 0xab7c), ('\u{13ad}', 0xab7d), ('\u{13ae}', 0xab7e), + ('\u{13af}', 0xab7f), ('\u{13b0}', 0xab80), ('\u{13b1}', 0xab81), ('\u{13b2}', 0xab82), + ('\u{13b3}', 0xab83), ('\u{13b4}', 0xab84), ('\u{13b5}', 0xab85), ('\u{13b6}', 0xab86), + ('\u{13b7}', 0xab87), ('\u{13b8}', 0xab88), ('\u{13b9}', 0xab89), ('\u{13ba}', 0xab8a), + ('\u{13bb}', 0xab8b), ('\u{13bc}', 0xab8c), ('\u{13bd}', 0xab8d), ('\u{13be}', 0xab8e), + ('\u{13bf}', 0xab8f), ('\u{13c0}', 0xab90), ('\u{13c1}', 0xab91), ('\u{13c2}', 0xab92), + ('\u{13c3}', 0xab93), ('\u{13c4}', 0xab94), ('\u{13c5}', 0xab95), ('\u{13c6}', 0xab96), + ('\u{13c7}', 0xab97), ('\u{13c8}', 0xab98), ('\u{13c9}', 0xab99), ('\u{13ca}', 0xab9a), + ('\u{13cb}', 0xab9b), ('\u{13cc}', 0xab9c), ('\u{13cd}', 0xab9d), ('\u{13ce}', 0xab9e), + ('\u{13cf}', 0xab9f), ('\u{13d0}', 0xaba0), ('\u{13d1}', 0xaba1), ('\u{13d2}', 0xaba2), + ('\u{13d3}', 0xaba3), ('\u{13d4}', 0xaba4), ('\u{13d5}', 0xaba5), ('\u{13d6}', 0xaba6), + ('\u{13d7}', 0xaba7), ('\u{13d8}', 0xaba8), ('\u{13d9}', 0xaba9), ('\u{13da}', 0xabaa), + ('\u{13db}', 0xabab), ('\u{13dc}', 0xabac), ('\u{13dd}', 0xabad), ('\u{13de}', 0xabae), + ('\u{13df}', 0xabaf), ('\u{13e0}', 0xabb0), ('\u{13e1}', 0xabb1), ('\u{13e2}', 0xabb2), + ('\u{13e3}', 0xabb3), ('\u{13e4}', 0xabb4), ('\u{13e5}', 0xabb5), ('\u{13e6}', 0xabb6), + ('\u{13e7}', 0xabb7), ('\u{13e8}', 0xabb8), ('\u{13e9}', 0xabb9), ('\u{13ea}', 0xabba), + ('\u{13eb}', 0xabbb), ('\u{13ec}', 0xabbc), ('\u{13ed}', 0xabbd), ('\u{13ee}', 0xabbe), + ('\u{13ef}', 0xabbf), ('\u{13f0}', 0x13f8), ('\u{13f1}', 0x13f9), ('\u{13f2}', 0x13fa), + ('\u{13f3}', 0x13fb), ('\u{13f4}', 0x13fc), ('\u{13f5}', 0x13fd), ('\u{1c89}', 0x1c8a), + ('\u{1c90}', 0x10d0), ('\u{1c91}', 0x10d1), ('\u{1c92}', 0x10d2), ('\u{1c93}', 0x10d3), + ('\u{1c94}', 0x10d4), ('\u{1c95}', 0x10d5), ('\u{1c96}', 0x10d6), ('\u{1c97}', 0x10d7), + ('\u{1c98}', 0x10d8), ('\u{1c99}', 0x10d9), ('\u{1c9a}', 0x10da), ('\u{1c9b}', 0x10db), + ('\u{1c9c}', 0x10dc), ('\u{1c9d}', 0x10dd), ('\u{1c9e}', 0x10de), ('\u{1c9f}', 0x10df), + ('\u{1ca0}', 0x10e0), ('\u{1ca1}', 0x10e1), ('\u{1ca2}', 0x10e2), ('\u{1ca3}', 0x10e3), + ('\u{1ca4}', 0x10e4), ('\u{1ca5}', 0x10e5), ('\u{1ca6}', 0x10e6), ('\u{1ca7}', 0x10e7), + ('\u{1ca8}', 0x10e8), ('\u{1ca9}', 0x10e9), ('\u{1caa}', 0x10ea), ('\u{1cab}', 0x10eb), + ('\u{1cac}', 0x10ec), ('\u{1cad}', 0x10ed), ('\u{1cae}', 0x10ee), ('\u{1caf}', 0x10ef), + ('\u{1cb0}', 0x10f0), ('\u{1cb1}', 0x10f1), ('\u{1cb2}', 0x10f2), ('\u{1cb3}', 0x10f3), + ('\u{1cb4}', 0x10f4), ('\u{1cb5}', 0x10f5), ('\u{1cb6}', 0x10f6), ('\u{1cb7}', 0x10f7), + ('\u{1cb8}', 0x10f8), ('\u{1cb9}', 0x10f9), ('\u{1cba}', 0x10fa), ('\u{1cbd}', 0x10fd), + ('\u{1cbe}', 0x10fe), ('\u{1cbf}', 0x10ff), ('\u{1e00}', 0x1e01), ('\u{1e02}', 0x1e03), + ('\u{1e04}', 0x1e05), ('\u{1e06}', 0x1e07), ('\u{1e08}', 0x1e09), ('\u{1e0a}', 0x1e0b), + ('\u{1e0c}', 0x1e0d), ('\u{1e0e}', 0x1e0f), ('\u{1e10}', 0x1e11), ('\u{1e12}', 0x1e13), + ('\u{1e14}', 0x1e15), ('\u{1e16}', 0x1e17), ('\u{1e18}', 0x1e19), ('\u{1e1a}', 0x1e1b), + ('\u{1e1c}', 0x1e1d), ('\u{1e1e}', 0x1e1f), ('\u{1e20}', 0x1e21), ('\u{1e22}', 0x1e23), + ('\u{1e24}', 0x1e25), ('\u{1e26}', 0x1e27), ('\u{1e28}', 0x1e29), ('\u{1e2a}', 0x1e2b), + ('\u{1e2c}', 0x1e2d), ('\u{1e2e}', 0x1e2f), ('\u{1e30}', 0x1e31), ('\u{1e32}', 0x1e33), + ('\u{1e34}', 0x1e35), ('\u{1e36}', 0x1e37), ('\u{1e38}', 0x1e39), ('\u{1e3a}', 0x1e3b), + ('\u{1e3c}', 0x1e3d), ('\u{1e3e}', 0x1e3f), ('\u{1e40}', 0x1e41), ('\u{1e42}', 0x1e43), + ('\u{1e44}', 0x1e45), ('\u{1e46}', 0x1e47), ('\u{1e48}', 0x1e49), ('\u{1e4a}', 0x1e4b), + ('\u{1e4c}', 0x1e4d), ('\u{1e4e}', 0x1e4f), ('\u{1e50}', 0x1e51), ('\u{1e52}', 0x1e53), + ('\u{1e54}', 0x1e55), ('\u{1e56}', 0x1e57), ('\u{1e58}', 0x1e59), ('\u{1e5a}', 0x1e5b), + ('\u{1e5c}', 0x1e5d), ('\u{1e5e}', 0x1e5f), ('\u{1e60}', 0x1e61), ('\u{1e62}', 0x1e63), + ('\u{1e64}', 0x1e65), ('\u{1e66}', 0x1e67), ('\u{1e68}', 0x1e69), ('\u{1e6a}', 0x1e6b), + ('\u{1e6c}', 0x1e6d), ('\u{1e6e}', 0x1e6f), ('\u{1e70}', 0x1e71), ('\u{1e72}', 0x1e73), + ('\u{1e74}', 0x1e75), ('\u{1e76}', 0x1e77), ('\u{1e78}', 0x1e79), ('\u{1e7a}', 0x1e7b), + ('\u{1e7c}', 0x1e7d), ('\u{1e7e}', 0x1e7f), ('\u{1e80}', 0x1e81), ('\u{1e82}', 0x1e83), + ('\u{1e84}', 0x1e85), ('\u{1e86}', 0x1e87), ('\u{1e88}', 0x1e89), ('\u{1e8a}', 0x1e8b), + ('\u{1e8c}', 0x1e8d), ('\u{1e8e}', 0x1e8f), ('\u{1e90}', 0x1e91), ('\u{1e92}', 0x1e93), + ('\u{1e94}', 0x1e95), ('\u{1e9e}', 0xdf), ('\u{1ea0}', 0x1ea1), ('\u{1ea2}', 0x1ea3), + ('\u{1ea4}', 0x1ea5), ('\u{1ea6}', 0x1ea7), ('\u{1ea8}', 0x1ea9), ('\u{1eaa}', 0x1eab), + ('\u{1eac}', 0x1ead), ('\u{1eae}', 0x1eaf), ('\u{1eb0}', 0x1eb1), ('\u{1eb2}', 0x1eb3), + ('\u{1eb4}', 0x1eb5), ('\u{1eb6}', 0x1eb7), ('\u{1eb8}', 0x1eb9), ('\u{1eba}', 0x1ebb), + ('\u{1ebc}', 0x1ebd), ('\u{1ebe}', 0x1ebf), ('\u{1ec0}', 0x1ec1), ('\u{1ec2}', 0x1ec3), + ('\u{1ec4}', 0x1ec5), ('\u{1ec6}', 0x1ec7), ('\u{1ec8}', 0x1ec9), ('\u{1eca}', 0x1ecb), + ('\u{1ecc}', 0x1ecd), ('\u{1ece}', 0x1ecf), ('\u{1ed0}', 0x1ed1), ('\u{1ed2}', 0x1ed3), + ('\u{1ed4}', 0x1ed5), ('\u{1ed6}', 0x1ed7), ('\u{1ed8}', 0x1ed9), ('\u{1eda}', 0x1edb), + ('\u{1edc}', 0x1edd), ('\u{1ede}', 0x1edf), ('\u{1ee0}', 0x1ee1), ('\u{1ee2}', 0x1ee3), + ('\u{1ee4}', 0x1ee5), ('\u{1ee6}', 0x1ee7), ('\u{1ee8}', 0x1ee9), ('\u{1eea}', 0x1eeb), + ('\u{1eec}', 0x1eed), ('\u{1eee}', 0x1eef), ('\u{1ef0}', 0x1ef1), ('\u{1ef2}', 0x1ef3), + ('\u{1ef4}', 0x1ef5), ('\u{1ef6}', 0x1ef7), ('\u{1ef8}', 0x1ef9), ('\u{1efa}', 0x1efb), + ('\u{1efc}', 0x1efd), ('\u{1efe}', 0x1eff), ('\u{1f08}', 0x1f00), ('\u{1f09}', 0x1f01), + ('\u{1f0a}', 0x1f02), ('\u{1f0b}', 0x1f03), ('\u{1f0c}', 0x1f04), ('\u{1f0d}', 0x1f05), + ('\u{1f0e}', 0x1f06), ('\u{1f0f}', 0x1f07), ('\u{1f18}', 0x1f10), ('\u{1f19}', 0x1f11), + ('\u{1f1a}', 0x1f12), ('\u{1f1b}', 0x1f13), ('\u{1f1c}', 0x1f14), ('\u{1f1d}', 0x1f15), + ('\u{1f28}', 0x1f20), ('\u{1f29}', 0x1f21), ('\u{1f2a}', 0x1f22), ('\u{1f2b}', 0x1f23), + ('\u{1f2c}', 0x1f24), ('\u{1f2d}', 0x1f25), ('\u{1f2e}', 0x1f26), ('\u{1f2f}', 0x1f27), + ('\u{1f38}', 0x1f30), ('\u{1f39}', 0x1f31), ('\u{1f3a}', 0x1f32), ('\u{1f3b}', 0x1f33), + ('\u{1f3c}', 0x1f34), ('\u{1f3d}', 0x1f35), ('\u{1f3e}', 0x1f36), ('\u{1f3f}', 0x1f37), + ('\u{1f48}', 0x1f40), ('\u{1f49}', 0x1f41), ('\u{1f4a}', 0x1f42), ('\u{1f4b}', 0x1f43), + ('\u{1f4c}', 0x1f44), ('\u{1f4d}', 0x1f45), ('\u{1f59}', 0x1f51), ('\u{1f5b}', 0x1f53), + ('\u{1f5d}', 0x1f55), ('\u{1f5f}', 0x1f57), ('\u{1f68}', 0x1f60), ('\u{1f69}', 0x1f61), + ('\u{1f6a}', 0x1f62), ('\u{1f6b}', 0x1f63), ('\u{1f6c}', 0x1f64), ('\u{1f6d}', 0x1f65), + ('\u{1f6e}', 0x1f66), ('\u{1f6f}', 0x1f67), ('\u{1f88}', 0x1f80), ('\u{1f89}', 0x1f81), + ('\u{1f8a}', 0x1f82), ('\u{1f8b}', 0x1f83), ('\u{1f8c}', 0x1f84), ('\u{1f8d}', 0x1f85), + ('\u{1f8e}', 0x1f86), ('\u{1f8f}', 0x1f87), ('\u{1f98}', 0x1f90), ('\u{1f99}', 0x1f91), + ('\u{1f9a}', 0x1f92), ('\u{1f9b}', 0x1f93), ('\u{1f9c}', 0x1f94), ('\u{1f9d}', 0x1f95), + ('\u{1f9e}', 0x1f96), ('\u{1f9f}', 0x1f97), ('\u{1fa8}', 0x1fa0), ('\u{1fa9}', 0x1fa1), + ('\u{1faa}', 0x1fa2), ('\u{1fab}', 0x1fa3), ('\u{1fac}', 0x1fa4), ('\u{1fad}', 0x1fa5), + ('\u{1fae}', 0x1fa6), ('\u{1faf}', 0x1fa7), ('\u{1fb8}', 0x1fb0), ('\u{1fb9}', 0x1fb1), + ('\u{1fba}', 0x1f70), ('\u{1fbb}', 0x1f71), ('\u{1fbc}', 0x1fb3), ('\u{1fc8}', 0x1f72), + ('\u{1fc9}', 0x1f73), ('\u{1fca}', 0x1f74), ('\u{1fcb}', 0x1f75), ('\u{1fcc}', 0x1fc3), + ('\u{1fd8}', 0x1fd0), ('\u{1fd9}', 0x1fd1), ('\u{1fda}', 0x1f76), ('\u{1fdb}', 0x1f77), + ('\u{1fe8}', 0x1fe0), ('\u{1fe9}', 0x1fe1), ('\u{1fea}', 0x1f7a), ('\u{1feb}', 0x1f7b), + ('\u{1fec}', 0x1fe5), ('\u{1ff8}', 0x1f78), ('\u{1ff9}', 0x1f79), ('\u{1ffa}', 0x1f7c), + ('\u{1ffb}', 0x1f7d), ('\u{1ffc}', 0x1ff3), ('\u{2126}', 0x3c9), ('\u{212a}', 0x6b), + ('\u{212b}', 0xe5), ('\u{2132}', 0x214e), ('\u{2160}', 0x2170), ('\u{2161}', 0x2171), + ('\u{2162}', 0x2172), ('\u{2163}', 0x2173), ('\u{2164}', 0x2174), ('\u{2165}', 0x2175), + ('\u{2166}', 0x2176), ('\u{2167}', 0x2177), ('\u{2168}', 0x2178), ('\u{2169}', 0x2179), + ('\u{216a}', 0x217a), ('\u{216b}', 0x217b), ('\u{216c}', 0x217c), ('\u{216d}', 0x217d), + ('\u{216e}', 0x217e), ('\u{216f}', 0x217f), ('\u{2183}', 0x2184), ('\u{24b6}', 0x24d0), + ('\u{24b7}', 0x24d1), ('\u{24b8}', 0x24d2), ('\u{24b9}', 0x24d3), ('\u{24ba}', 0x24d4), + ('\u{24bb}', 0x24d5), ('\u{24bc}', 0x24d6), ('\u{24bd}', 0x24d7), ('\u{24be}', 0x24d8), + ('\u{24bf}', 0x24d9), ('\u{24c0}', 0x24da), ('\u{24c1}', 0x24db), ('\u{24c2}', 0x24dc), + ('\u{24c3}', 0x24dd), ('\u{24c4}', 0x24de), ('\u{24c5}', 0x24df), ('\u{24c6}', 0x24e0), + ('\u{24c7}', 0x24e1), ('\u{24c8}', 0x24e2), ('\u{24c9}', 0x24e3), ('\u{24ca}', 0x24e4), + ('\u{24cb}', 0x24e5), ('\u{24cc}', 0x24e6), ('\u{24cd}', 0x24e7), ('\u{24ce}', 0x24e8), + ('\u{24cf}', 0x24e9), ('\u{2c00}', 0x2c30), ('\u{2c01}', 0x2c31), ('\u{2c02}', 0x2c32), + ('\u{2c03}', 0x2c33), ('\u{2c04}', 0x2c34), ('\u{2c05}', 0x2c35), ('\u{2c06}', 0x2c36), + ('\u{2c07}', 0x2c37), ('\u{2c08}', 0x2c38), ('\u{2c09}', 0x2c39), ('\u{2c0a}', 0x2c3a), + ('\u{2c0b}', 0x2c3b), ('\u{2c0c}', 0x2c3c), ('\u{2c0d}', 0x2c3d), ('\u{2c0e}', 0x2c3e), + ('\u{2c0f}', 0x2c3f), ('\u{2c10}', 0x2c40), ('\u{2c11}', 0x2c41), ('\u{2c12}', 0x2c42), + ('\u{2c13}', 0x2c43), ('\u{2c14}', 0x2c44), ('\u{2c15}', 0x2c45), ('\u{2c16}', 0x2c46), + ('\u{2c17}', 0x2c47), ('\u{2c18}', 0x2c48), ('\u{2c19}', 0x2c49), ('\u{2c1a}', 0x2c4a), + ('\u{2c1b}', 0x2c4b), ('\u{2c1c}', 0x2c4c), ('\u{2c1d}', 0x2c4d), ('\u{2c1e}', 0x2c4e), + ('\u{2c1f}', 0x2c4f), ('\u{2c20}', 0x2c50), ('\u{2c21}', 0x2c51), ('\u{2c22}', 0x2c52), + ('\u{2c23}', 0x2c53), ('\u{2c24}', 0x2c54), ('\u{2c25}', 0x2c55), ('\u{2c26}', 0x2c56), + ('\u{2c27}', 0x2c57), ('\u{2c28}', 0x2c58), ('\u{2c29}', 0x2c59), ('\u{2c2a}', 0x2c5a), + ('\u{2c2b}', 0x2c5b), ('\u{2c2c}', 0x2c5c), ('\u{2c2d}', 0x2c5d), ('\u{2c2e}', 0x2c5e), + ('\u{2c2f}', 0x2c5f), ('\u{2c60}', 0x2c61), ('\u{2c62}', 0x26b), ('\u{2c63}', 0x1d7d), + ('\u{2c64}', 0x27d), ('\u{2c67}', 0x2c68), ('\u{2c69}', 0x2c6a), ('\u{2c6b}', 0x2c6c), + ('\u{2c6d}', 0x251), ('\u{2c6e}', 0x271), ('\u{2c6f}', 0x250), ('\u{2c70}', 0x252), + ('\u{2c72}', 0x2c73), ('\u{2c75}', 0x2c76), ('\u{2c7e}', 0x23f), ('\u{2c7f}', 0x240), + ('\u{2c80}', 0x2c81), ('\u{2c82}', 0x2c83), ('\u{2c84}', 0x2c85), ('\u{2c86}', 0x2c87), + ('\u{2c88}', 0x2c89), ('\u{2c8a}', 0x2c8b), ('\u{2c8c}', 0x2c8d), ('\u{2c8e}', 0x2c8f), + ('\u{2c90}', 0x2c91), ('\u{2c92}', 0x2c93), ('\u{2c94}', 0x2c95), ('\u{2c96}', 0x2c97), + ('\u{2c98}', 0x2c99), ('\u{2c9a}', 0x2c9b), ('\u{2c9c}', 0x2c9d), ('\u{2c9e}', 0x2c9f), + ('\u{2ca0}', 0x2ca1), ('\u{2ca2}', 0x2ca3), ('\u{2ca4}', 0x2ca5), ('\u{2ca6}', 0x2ca7), + ('\u{2ca8}', 0x2ca9), ('\u{2caa}', 0x2cab), ('\u{2cac}', 0x2cad), ('\u{2cae}', 0x2caf), + ('\u{2cb0}', 0x2cb1), ('\u{2cb2}', 0x2cb3), ('\u{2cb4}', 0x2cb5), ('\u{2cb6}', 0x2cb7), + ('\u{2cb8}', 0x2cb9), ('\u{2cba}', 0x2cbb), ('\u{2cbc}', 0x2cbd), ('\u{2cbe}', 0x2cbf), + ('\u{2cc0}', 0x2cc1), ('\u{2cc2}', 0x2cc3), ('\u{2cc4}', 0x2cc5), ('\u{2cc6}', 0x2cc7), + ('\u{2cc8}', 0x2cc9), ('\u{2cca}', 0x2ccb), ('\u{2ccc}', 0x2ccd), ('\u{2cce}', 0x2ccf), + ('\u{2cd0}', 0x2cd1), ('\u{2cd2}', 0x2cd3), ('\u{2cd4}', 0x2cd5), ('\u{2cd6}', 0x2cd7), + ('\u{2cd8}', 0x2cd9), ('\u{2cda}', 0x2cdb), ('\u{2cdc}', 0x2cdd), ('\u{2cde}', 0x2cdf), + ('\u{2ce0}', 0x2ce1), ('\u{2ce2}', 0x2ce3), ('\u{2ceb}', 0x2cec), ('\u{2ced}', 0x2cee), + ('\u{2cf2}', 0x2cf3), ('\u{a640}', 0xa641), ('\u{a642}', 0xa643), ('\u{a644}', 0xa645), + ('\u{a646}', 0xa647), ('\u{a648}', 0xa649), ('\u{a64a}', 0xa64b), ('\u{a64c}', 0xa64d), + ('\u{a64e}', 0xa64f), ('\u{a650}', 0xa651), ('\u{a652}', 0xa653), ('\u{a654}', 0xa655), + ('\u{a656}', 0xa657), ('\u{a658}', 0xa659), ('\u{a65a}', 0xa65b), ('\u{a65c}', 0xa65d), + ('\u{a65e}', 0xa65f), ('\u{a660}', 0xa661), ('\u{a662}', 0xa663), ('\u{a664}', 0xa665), + ('\u{a666}', 0xa667), ('\u{a668}', 0xa669), ('\u{a66a}', 0xa66b), ('\u{a66c}', 0xa66d), + ('\u{a680}', 0xa681), ('\u{a682}', 0xa683), ('\u{a684}', 0xa685), ('\u{a686}', 0xa687), + ('\u{a688}', 0xa689), ('\u{a68a}', 0xa68b), ('\u{a68c}', 0xa68d), ('\u{a68e}', 0xa68f), + ('\u{a690}', 0xa691), ('\u{a692}', 0xa693), ('\u{a694}', 0xa695), ('\u{a696}', 0xa697), + ('\u{a698}', 0xa699), ('\u{a69a}', 0xa69b), ('\u{a722}', 0xa723), ('\u{a724}', 0xa725), + ('\u{a726}', 0xa727), ('\u{a728}', 0xa729), ('\u{a72a}', 0xa72b), ('\u{a72c}', 0xa72d), + ('\u{a72e}', 0xa72f), ('\u{a732}', 0xa733), ('\u{a734}', 0xa735), ('\u{a736}', 0xa737), + ('\u{a738}', 0xa739), ('\u{a73a}', 0xa73b), ('\u{a73c}', 0xa73d), ('\u{a73e}', 0xa73f), + ('\u{a740}', 0xa741), ('\u{a742}', 0xa743), ('\u{a744}', 0xa745), ('\u{a746}', 0xa747), + ('\u{a748}', 0xa749), ('\u{a74a}', 0xa74b), ('\u{a74c}', 0xa74d), ('\u{a74e}', 0xa74f), + ('\u{a750}', 0xa751), ('\u{a752}', 0xa753), ('\u{a754}', 0xa755), ('\u{a756}', 0xa757), + ('\u{a758}', 0xa759), ('\u{a75a}', 0xa75b), ('\u{a75c}', 0xa75d), ('\u{a75e}', 0xa75f), + ('\u{a760}', 0xa761), ('\u{a762}', 0xa763), ('\u{a764}', 0xa765), ('\u{a766}', 0xa767), + ('\u{a768}', 0xa769), ('\u{a76a}', 0xa76b), ('\u{a76c}', 0xa76d), ('\u{a76e}', 0xa76f), + ('\u{a779}', 0xa77a), ('\u{a77b}', 0xa77c), ('\u{a77d}', 0x1d79), ('\u{a77e}', 0xa77f), + ('\u{a780}', 0xa781), ('\u{a782}', 0xa783), ('\u{a784}', 0xa785), ('\u{a786}', 0xa787), + ('\u{a78b}', 0xa78c), ('\u{a78d}', 0x265), ('\u{a790}', 0xa791), ('\u{a792}', 0xa793), + ('\u{a796}', 0xa797), ('\u{a798}', 0xa799), ('\u{a79a}', 0xa79b), ('\u{a79c}', 0xa79d), + ('\u{a79e}', 0xa79f), ('\u{a7a0}', 0xa7a1), ('\u{a7a2}', 0xa7a3), ('\u{a7a4}', 0xa7a5), + ('\u{a7a6}', 0xa7a7), ('\u{a7a8}', 0xa7a9), ('\u{a7aa}', 0x266), ('\u{a7ab}', 0x25c), + ('\u{a7ac}', 0x261), ('\u{a7ad}', 0x26c), ('\u{a7ae}', 0x26a), ('\u{a7b0}', 0x29e), + ('\u{a7b1}', 0x287), ('\u{a7b2}', 0x29d), ('\u{a7b3}', 0xab53), ('\u{a7b4}', 0xa7b5), + ('\u{a7b6}', 0xa7b7), ('\u{a7b8}', 0xa7b9), ('\u{a7ba}', 0xa7bb), ('\u{a7bc}', 0xa7bd), + ('\u{a7be}', 0xa7bf), ('\u{a7c0}', 0xa7c1), ('\u{a7c2}', 0xa7c3), ('\u{a7c4}', 0xa794), + ('\u{a7c5}', 0x282), ('\u{a7c6}', 0x1d8e), ('\u{a7c7}', 0xa7c8), ('\u{a7c9}', 0xa7ca), + ('\u{a7cb}', 0x264), ('\u{a7cc}', 0xa7cd), ('\u{a7ce}', 0xa7cf), ('\u{a7d0}', 0xa7d1), + ('\u{a7d2}', 0xa7d3), ('\u{a7d4}', 0xa7d5), ('\u{a7d6}', 0xa7d7), ('\u{a7d8}', 0xa7d9), + ('\u{a7da}', 0xa7db), ('\u{a7dc}', 0x19b), ('\u{a7f5}', 0xa7f6), ('\u{ff21}', 0xff41), + ('\u{ff22}', 0xff42), ('\u{ff23}', 0xff43), ('\u{ff24}', 0xff44), ('\u{ff25}', 0xff45), + ('\u{ff26}', 0xff46), ('\u{ff27}', 0xff47), ('\u{ff28}', 0xff48), ('\u{ff29}', 0xff49), + ('\u{ff2a}', 0xff4a), ('\u{ff2b}', 0xff4b), ('\u{ff2c}', 0xff4c), ('\u{ff2d}', 0xff4d), + ('\u{ff2e}', 0xff4e), ('\u{ff2f}', 0xff4f), ('\u{ff30}', 0xff50), ('\u{ff31}', 0xff51), + ('\u{ff32}', 0xff52), ('\u{ff33}', 0xff53), ('\u{ff34}', 0xff54), ('\u{ff35}', 0xff55), + ('\u{ff36}', 0xff56), ('\u{ff37}', 0xff57), ('\u{ff38}', 0xff58), ('\u{ff39}', 0xff59), + ('\u{ff3a}', 0xff5a), ('\u{10400}', 0x10428), ('\u{10401}', 0x10429), + ('\u{10402}', 0x1042a), ('\u{10403}', 0x1042b), ('\u{10404}', 0x1042c), + ('\u{10405}', 0x1042d), ('\u{10406}', 0x1042e), ('\u{10407}', 0x1042f), + ('\u{10408}', 0x10430), ('\u{10409}', 0x10431), ('\u{1040a}', 0x10432), + ('\u{1040b}', 0x10433), ('\u{1040c}', 0x10434), ('\u{1040d}', 0x10435), + ('\u{1040e}', 0x10436), ('\u{1040f}', 0x10437), ('\u{10410}', 0x10438), + ('\u{10411}', 0x10439), ('\u{10412}', 0x1043a), ('\u{10413}', 0x1043b), + ('\u{10414}', 0x1043c), ('\u{10415}', 0x1043d), ('\u{10416}', 0x1043e), + ('\u{10417}', 0x1043f), ('\u{10418}', 0x10440), ('\u{10419}', 0x10441), + ('\u{1041a}', 0x10442), ('\u{1041b}', 0x10443), ('\u{1041c}', 0x10444), + ('\u{1041d}', 0x10445), ('\u{1041e}', 0x10446), ('\u{1041f}', 0x10447), + ('\u{10420}', 0x10448), ('\u{10421}', 0x10449), ('\u{10422}', 0x1044a), + ('\u{10423}', 0x1044b), ('\u{10424}', 0x1044c), ('\u{10425}', 0x1044d), + ('\u{10426}', 0x1044e), ('\u{10427}', 0x1044f), ('\u{104b0}', 0x104d8), + ('\u{104b1}', 0x104d9), ('\u{104b2}', 0x104da), ('\u{104b3}', 0x104db), + ('\u{104b4}', 0x104dc), ('\u{104b5}', 0x104dd), ('\u{104b6}', 0x104de), + ('\u{104b7}', 0x104df), ('\u{104b8}', 0x104e0), ('\u{104b9}', 0x104e1), + ('\u{104ba}', 0x104e2), ('\u{104bb}', 0x104e3), ('\u{104bc}', 0x104e4), + ('\u{104bd}', 0x104e5), ('\u{104be}', 0x104e6), ('\u{104bf}', 0x104e7), + ('\u{104c0}', 0x104e8), ('\u{104c1}', 0x104e9), ('\u{104c2}', 0x104ea), + ('\u{104c3}', 0x104eb), ('\u{104c4}', 0x104ec), ('\u{104c5}', 0x104ed), + ('\u{104c6}', 0x104ee), ('\u{104c7}', 0x104ef), ('\u{104c8}', 0x104f0), + ('\u{104c9}', 0x104f1), ('\u{104ca}', 0x104f2), ('\u{104cb}', 0x104f3), + ('\u{104cc}', 0x104f4), ('\u{104cd}', 0x104f5), ('\u{104ce}', 0x104f6), + ('\u{104cf}', 0x104f7), ('\u{104d0}', 0x104f8), ('\u{104d1}', 0x104f9), + ('\u{104d2}', 0x104fa), ('\u{104d3}', 0x104fb), ('\u{10570}', 0x10597), + ('\u{10571}', 0x10598), ('\u{10572}', 0x10599), ('\u{10573}', 0x1059a), + ('\u{10574}', 0x1059b), ('\u{10575}', 0x1059c), ('\u{10576}', 0x1059d), + ('\u{10577}', 0x1059e), ('\u{10578}', 0x1059f), ('\u{10579}', 0x105a0), + ('\u{1057a}', 0x105a1), ('\u{1057c}', 0x105a3), ('\u{1057d}', 0x105a4), + ('\u{1057e}', 0x105a5), ('\u{1057f}', 0x105a6), ('\u{10580}', 0x105a7), + ('\u{10581}', 0x105a8), ('\u{10582}', 0x105a9), ('\u{10583}', 0x105aa), + ('\u{10584}', 0x105ab), ('\u{10585}', 0x105ac), ('\u{10586}', 0x105ad), + ('\u{10587}', 0x105ae), ('\u{10588}', 0x105af), ('\u{10589}', 0x105b0), + ('\u{1058a}', 0x105b1), ('\u{1058c}', 0x105b3), ('\u{1058d}', 0x105b4), + ('\u{1058e}', 0x105b5), ('\u{1058f}', 0x105b6), ('\u{10590}', 0x105b7), + ('\u{10591}', 0x105b8), ('\u{10592}', 0x105b9), ('\u{10594}', 0x105bb), + ('\u{10595}', 0x105bc), ('\u{10c80}', 0x10cc0), ('\u{10c81}', 0x10cc1), + ('\u{10c82}', 0x10cc2), ('\u{10c83}', 0x10cc3), ('\u{10c84}', 0x10cc4), + ('\u{10c85}', 0x10cc5), ('\u{10c86}', 0x10cc6), ('\u{10c87}', 0x10cc7), + ('\u{10c88}', 0x10cc8), ('\u{10c89}', 0x10cc9), ('\u{10c8a}', 0x10cca), + ('\u{10c8b}', 0x10ccb), ('\u{10c8c}', 0x10ccc), ('\u{10c8d}', 0x10ccd), + ('\u{10c8e}', 0x10cce), ('\u{10c8f}', 0x10ccf), ('\u{10c90}', 0x10cd0), + ('\u{10c91}', 0x10cd1), ('\u{10c92}', 0x10cd2), ('\u{10c93}', 0x10cd3), + ('\u{10c94}', 0x10cd4), ('\u{10c95}', 0x10cd5), ('\u{10c96}', 0x10cd6), + ('\u{10c97}', 0x10cd7), ('\u{10c98}', 0x10cd8), ('\u{10c99}', 0x10cd9), + ('\u{10c9a}', 0x10cda), ('\u{10c9b}', 0x10cdb), ('\u{10c9c}', 0x10cdc), + ('\u{10c9d}', 0x10cdd), ('\u{10c9e}', 0x10cde), ('\u{10c9f}', 0x10cdf), + ('\u{10ca0}', 0x10ce0), ('\u{10ca1}', 0x10ce1), ('\u{10ca2}', 0x10ce2), + ('\u{10ca3}', 0x10ce3), ('\u{10ca4}', 0x10ce4), ('\u{10ca5}', 0x10ce5), + ('\u{10ca6}', 0x10ce6), ('\u{10ca7}', 0x10ce7), ('\u{10ca8}', 0x10ce8), + ('\u{10ca9}', 0x10ce9), ('\u{10caa}', 0x10cea), ('\u{10cab}', 0x10ceb), + ('\u{10cac}', 0x10cec), ('\u{10cad}', 0x10ced), ('\u{10cae}', 0x10cee), + ('\u{10caf}', 0x10cef), ('\u{10cb0}', 0x10cf0), ('\u{10cb1}', 0x10cf1), + ('\u{10cb2}', 0x10cf2), ('\u{10d50}', 0x10d70), ('\u{10d51}', 0x10d71), + ('\u{10d52}', 0x10d72), ('\u{10d53}', 0x10d73), ('\u{10d54}', 0x10d74), + ('\u{10d55}', 0x10d75), ('\u{10d56}', 0x10d76), ('\u{10d57}', 0x10d77), + ('\u{10d58}', 0x10d78), ('\u{10d59}', 0x10d79), ('\u{10d5a}', 0x10d7a), + ('\u{10d5b}', 0x10d7b), ('\u{10d5c}', 0x10d7c), ('\u{10d5d}', 0x10d7d), + ('\u{10d5e}', 0x10d7e), ('\u{10d5f}', 0x10d7f), ('\u{10d60}', 0x10d80), + ('\u{10d61}', 0x10d81), ('\u{10d62}', 0x10d82), ('\u{10d63}', 0x10d83), + ('\u{10d64}', 0x10d84), ('\u{10d65}', 0x10d85), ('\u{118a0}', 0x118c0), + ('\u{118a1}', 0x118c1), ('\u{118a2}', 0x118c2), ('\u{118a3}', 0x118c3), + ('\u{118a4}', 0x118c4), ('\u{118a5}', 0x118c5), ('\u{118a6}', 0x118c6), + ('\u{118a7}', 0x118c7), ('\u{118a8}', 0x118c8), ('\u{118a9}', 0x118c9), + ('\u{118aa}', 0x118ca), ('\u{118ab}', 0x118cb), ('\u{118ac}', 0x118cc), + ('\u{118ad}', 0x118cd), ('\u{118ae}', 0x118ce), ('\u{118af}', 0x118cf), + ('\u{118b0}', 0x118d0), ('\u{118b1}', 0x118d1), ('\u{118b2}', 0x118d2), + ('\u{118b3}', 0x118d3), ('\u{118b4}', 0x118d4), ('\u{118b5}', 0x118d5), + ('\u{118b6}', 0x118d6), ('\u{118b7}', 0x118d7), ('\u{118b8}', 0x118d8), + ('\u{118b9}', 0x118d9), ('\u{118ba}', 0x118da), ('\u{118bb}', 0x118db), + ('\u{118bc}', 0x118dc), ('\u{118bd}', 0x118dd), ('\u{118be}', 0x118de), + ('\u{118bf}', 0x118df), ('\u{16e40}', 0x16e60), ('\u{16e41}', 0x16e61), + ('\u{16e42}', 0x16e62), ('\u{16e43}', 0x16e63), ('\u{16e44}', 0x16e64), + ('\u{16e45}', 0x16e65), ('\u{16e46}', 0x16e66), ('\u{16e47}', 0x16e67), + ('\u{16e48}', 0x16e68), ('\u{16e49}', 0x16e69), ('\u{16e4a}', 0x16e6a), + ('\u{16e4b}', 0x16e6b), ('\u{16e4c}', 0x16e6c), ('\u{16e4d}', 0x16e6d), + ('\u{16e4e}', 0x16e6e), ('\u{16e4f}', 0x16e6f), ('\u{16e50}', 0x16e70), + ('\u{16e51}', 0x16e71), ('\u{16e52}', 0x16e72), ('\u{16e53}', 0x16e73), + ('\u{16e54}', 0x16e74), ('\u{16e55}', 0x16e75), ('\u{16e56}', 0x16e76), + ('\u{16e57}', 0x16e77), ('\u{16e58}', 0x16e78), ('\u{16e59}', 0x16e79), + ('\u{16e5a}', 0x16e7a), ('\u{16e5b}', 0x16e7b), ('\u{16e5c}', 0x16e7c), + ('\u{16e5d}', 0x16e7d), ('\u{16e5e}', 0x16e7e), ('\u{16e5f}', 0x16e7f), + ('\u{16ea0}', 0x16ebb), ('\u{16ea1}', 0x16ebc), ('\u{16ea2}', 0x16ebd), + ('\u{16ea3}', 0x16ebe), ('\u{16ea4}', 0x16ebf), ('\u{16ea5}', 0x16ec0), + ('\u{16ea6}', 0x16ec1), ('\u{16ea7}', 0x16ec2), ('\u{16ea8}', 0x16ec3), + ('\u{16ea9}', 0x16ec4), ('\u{16eaa}', 0x16ec5), ('\u{16eab}', 0x16ec6), + ('\u{16eac}', 0x16ec7), ('\u{16ead}', 0x16ec8), ('\u{16eae}', 0x16ec9), + ('\u{16eaf}', 0x16eca), ('\u{16eb0}', 0x16ecb), ('\u{16eb1}', 0x16ecc), + ('\u{16eb2}', 0x16ecd), ('\u{16eb3}', 0x16ece), ('\u{16eb4}', 0x16ecf), + ('\u{16eb5}', 0x16ed0), ('\u{16eb6}', 0x16ed1), ('\u{16eb7}', 0x16ed2), + ('\u{16eb8}', 0x16ed3), ('\u{1e900}', 0x1e922), ('\u{1e901}', 0x1e923), + ('\u{1e902}', 0x1e924), ('\u{1e903}', 0x1e925), ('\u{1e904}', 0x1e926), + ('\u{1e905}', 0x1e927), ('\u{1e906}', 0x1e928), ('\u{1e907}', 0x1e929), + ('\u{1e908}', 0x1e92a), ('\u{1e909}', 0x1e92b), ('\u{1e90a}', 0x1e92c), + ('\u{1e90b}', 0x1e92d), ('\u{1e90c}', 0x1e92e), ('\u{1e90d}', 0x1e92f), + ('\u{1e90e}', 0x1e930), ('\u{1e90f}', 0x1e931), ('\u{1e910}', 0x1e932), + ('\u{1e911}', 0x1e933), ('\u{1e912}', 0x1e934), ('\u{1e913}', 0x1e935), + ('\u{1e914}', 0x1e936), ('\u{1e915}', 0x1e937), ('\u{1e916}', 0x1e938), + ('\u{1e917}', 0x1e939), ('\u{1e918}', 0x1e93a), ('\u{1e919}', 0x1e93b), + ('\u{1e91a}', 0x1e93c), ('\u{1e91b}', 0x1e93d), ('\u{1e91c}', 0x1e93e), + ('\u{1e91d}', 0x1e93f), ('\u{1e91e}', 0x1e940), ('\u{1e91f}', 0x1e941), + ('\u{1e920}', 0x1e942), ('\u{1e921}', 0x1e943), + ]; + + #[rustfmt::skip] + static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[ + ['\u{69}', '\u{307}', '\u{0}'], + ]; + #[inline] pub fn to_lower(c: char) -> [char; 3] { const { @@ -781,367 +1187,483 @@ pub mod conversions { ) } } + #[rustfmt::skip] - static LOWERCASE_TABLE: &[(char, u32); 1462] = &[ - ('\u{c0}', 224), ('\u{c1}', 225), ('\u{c2}', 226), ('\u{c3}', 227), ('\u{c4}', 228), - ('\u{c5}', 229), ('\u{c6}', 230), ('\u{c7}', 231), ('\u{c8}', 232), ('\u{c9}', 233), - ('\u{ca}', 234), ('\u{cb}', 235), ('\u{cc}', 236), ('\u{cd}', 237), ('\u{ce}', 238), - ('\u{cf}', 239), ('\u{d0}', 240), ('\u{d1}', 241), ('\u{d2}', 242), ('\u{d3}', 243), - ('\u{d4}', 244), ('\u{d5}', 245), ('\u{d6}', 246), ('\u{d8}', 248), ('\u{d9}', 249), - ('\u{da}', 250), ('\u{db}', 251), ('\u{dc}', 252), ('\u{dd}', 253), ('\u{de}', 254), - ('\u{100}', 257), ('\u{102}', 259), ('\u{104}', 261), ('\u{106}', 263), ('\u{108}', 265), - ('\u{10a}', 267), ('\u{10c}', 269), ('\u{10e}', 271), ('\u{110}', 273), ('\u{112}', 275), - ('\u{114}', 277), ('\u{116}', 279), ('\u{118}', 281), ('\u{11a}', 283), ('\u{11c}', 285), - ('\u{11e}', 287), ('\u{120}', 289), ('\u{122}', 291), ('\u{124}', 293), ('\u{126}', 295), - ('\u{128}', 297), ('\u{12a}', 299), ('\u{12c}', 301), ('\u{12e}', 303), - ('\u{130}', 4194304), ('\u{132}', 307), ('\u{134}', 309), ('\u{136}', 311), - ('\u{139}', 314), ('\u{13b}', 316), ('\u{13d}', 318), ('\u{13f}', 320), ('\u{141}', 322), - ('\u{143}', 324), ('\u{145}', 326), ('\u{147}', 328), ('\u{14a}', 331), ('\u{14c}', 333), - ('\u{14e}', 335), ('\u{150}', 337), ('\u{152}', 339), ('\u{154}', 341), ('\u{156}', 343), - ('\u{158}', 345), ('\u{15a}', 347), ('\u{15c}', 349), ('\u{15e}', 351), ('\u{160}', 353), - ('\u{162}', 355), ('\u{164}', 357), ('\u{166}', 359), ('\u{168}', 361), ('\u{16a}', 363), - ('\u{16c}', 365), ('\u{16e}', 367), ('\u{170}', 369), ('\u{172}', 371), ('\u{174}', 373), - ('\u{176}', 375), ('\u{178}', 255), ('\u{179}', 378), ('\u{17b}', 380), ('\u{17d}', 382), - ('\u{181}', 595), ('\u{182}', 387), ('\u{184}', 389), ('\u{186}', 596), ('\u{187}', 392), - ('\u{189}', 598), ('\u{18a}', 599), ('\u{18b}', 396), ('\u{18e}', 477), ('\u{18f}', 601), - ('\u{190}', 603), ('\u{191}', 402), ('\u{193}', 608), ('\u{194}', 611), ('\u{196}', 617), - ('\u{197}', 616), ('\u{198}', 409), ('\u{19c}', 623), ('\u{19d}', 626), ('\u{19f}', 629), - ('\u{1a0}', 417), ('\u{1a2}', 419), ('\u{1a4}', 421), ('\u{1a6}', 640), ('\u{1a7}', 424), - ('\u{1a9}', 643), ('\u{1ac}', 429), ('\u{1ae}', 648), ('\u{1af}', 432), ('\u{1b1}', 650), - ('\u{1b2}', 651), ('\u{1b3}', 436), ('\u{1b5}', 438), ('\u{1b7}', 658), ('\u{1b8}', 441), - ('\u{1bc}', 445), ('\u{1c4}', 454), ('\u{1c5}', 454), ('\u{1c7}', 457), ('\u{1c8}', 457), - ('\u{1ca}', 460), ('\u{1cb}', 460), ('\u{1cd}', 462), ('\u{1cf}', 464), ('\u{1d1}', 466), - ('\u{1d3}', 468), ('\u{1d5}', 470), ('\u{1d7}', 472), ('\u{1d9}', 474), ('\u{1db}', 476), - ('\u{1de}', 479), ('\u{1e0}', 481), ('\u{1e2}', 483), ('\u{1e4}', 485), ('\u{1e6}', 487), - ('\u{1e8}', 489), ('\u{1ea}', 491), ('\u{1ec}', 493), ('\u{1ee}', 495), ('\u{1f1}', 499), - ('\u{1f2}', 499), ('\u{1f4}', 501), ('\u{1f6}', 405), ('\u{1f7}', 447), ('\u{1f8}', 505), - ('\u{1fa}', 507), ('\u{1fc}', 509), ('\u{1fe}', 511), ('\u{200}', 513), ('\u{202}', 515), - ('\u{204}', 517), ('\u{206}', 519), ('\u{208}', 521), ('\u{20a}', 523), ('\u{20c}', 525), - ('\u{20e}', 527), ('\u{210}', 529), ('\u{212}', 531), ('\u{214}', 533), ('\u{216}', 535), - ('\u{218}', 537), ('\u{21a}', 539), ('\u{21c}', 541), ('\u{21e}', 543), ('\u{220}', 414), - ('\u{222}', 547), ('\u{224}', 549), ('\u{226}', 551), ('\u{228}', 553), ('\u{22a}', 555), - ('\u{22c}', 557), ('\u{22e}', 559), ('\u{230}', 561), ('\u{232}', 563), ('\u{23a}', 11365), - ('\u{23b}', 572), ('\u{23d}', 410), ('\u{23e}', 11366), ('\u{241}', 578), ('\u{243}', 384), - ('\u{244}', 649), ('\u{245}', 652), ('\u{246}', 583), ('\u{248}', 585), ('\u{24a}', 587), - ('\u{24c}', 589), ('\u{24e}', 591), ('\u{370}', 881), ('\u{372}', 883), ('\u{376}', 887), - ('\u{37f}', 1011), ('\u{386}', 940), ('\u{388}', 941), ('\u{389}', 942), ('\u{38a}', 943), - ('\u{38c}', 972), ('\u{38e}', 973), ('\u{38f}', 974), ('\u{391}', 945), ('\u{392}', 946), - ('\u{393}', 947), ('\u{394}', 948), ('\u{395}', 949), ('\u{396}', 950), ('\u{397}', 951), - ('\u{398}', 952), ('\u{399}', 953), ('\u{39a}', 954), ('\u{39b}', 955), ('\u{39c}', 956), - ('\u{39d}', 957), ('\u{39e}', 958), ('\u{39f}', 959), ('\u{3a0}', 960), ('\u{3a1}', 961), - ('\u{3a3}', 963), ('\u{3a4}', 964), ('\u{3a5}', 965), ('\u{3a6}', 966), ('\u{3a7}', 967), - ('\u{3a8}', 968), ('\u{3a9}', 969), ('\u{3aa}', 970), ('\u{3ab}', 971), ('\u{3cf}', 983), - ('\u{3d8}', 985), ('\u{3da}', 987), ('\u{3dc}', 989), ('\u{3de}', 991), ('\u{3e0}', 993), - ('\u{3e2}', 995), ('\u{3e4}', 997), ('\u{3e6}', 999), ('\u{3e8}', 1001), ('\u{3ea}', 1003), - ('\u{3ec}', 1005), ('\u{3ee}', 1007), ('\u{3f4}', 952), ('\u{3f7}', 1016), - ('\u{3f9}', 1010), ('\u{3fa}', 1019), ('\u{3fd}', 891), ('\u{3fe}', 892), ('\u{3ff}', 893), - ('\u{400}', 1104), ('\u{401}', 1105), ('\u{402}', 1106), ('\u{403}', 1107), - ('\u{404}', 1108), ('\u{405}', 1109), ('\u{406}', 1110), ('\u{407}', 1111), - ('\u{408}', 1112), ('\u{409}', 1113), ('\u{40a}', 1114), ('\u{40b}', 1115), - ('\u{40c}', 1116), ('\u{40d}', 1117), ('\u{40e}', 1118), ('\u{40f}', 1119), - ('\u{410}', 1072), ('\u{411}', 1073), ('\u{412}', 1074), ('\u{413}', 1075), - ('\u{414}', 1076), ('\u{415}', 1077), ('\u{416}', 1078), ('\u{417}', 1079), - ('\u{418}', 1080), ('\u{419}', 1081), ('\u{41a}', 1082), ('\u{41b}', 1083), - ('\u{41c}', 1084), ('\u{41d}', 1085), ('\u{41e}', 1086), ('\u{41f}', 1087), - ('\u{420}', 1088), ('\u{421}', 1089), ('\u{422}', 1090), ('\u{423}', 1091), - ('\u{424}', 1092), ('\u{425}', 1093), ('\u{426}', 1094), ('\u{427}', 1095), - ('\u{428}', 1096), ('\u{429}', 1097), ('\u{42a}', 1098), ('\u{42b}', 1099), - ('\u{42c}', 1100), ('\u{42d}', 1101), ('\u{42e}', 1102), ('\u{42f}', 1103), - ('\u{460}', 1121), ('\u{462}', 1123), ('\u{464}', 1125), ('\u{466}', 1127), - ('\u{468}', 1129), ('\u{46a}', 1131), ('\u{46c}', 1133), ('\u{46e}', 1135), - ('\u{470}', 1137), ('\u{472}', 1139), ('\u{474}', 1141), ('\u{476}', 1143), - ('\u{478}', 1145), ('\u{47a}', 1147), ('\u{47c}', 1149), ('\u{47e}', 1151), - ('\u{480}', 1153), ('\u{48a}', 1163), ('\u{48c}', 1165), ('\u{48e}', 1167), - ('\u{490}', 1169), ('\u{492}', 1171), ('\u{494}', 1173), ('\u{496}', 1175), - ('\u{498}', 1177), ('\u{49a}', 1179), ('\u{49c}', 1181), ('\u{49e}', 1183), - ('\u{4a0}', 1185), ('\u{4a2}', 1187), ('\u{4a4}', 1189), ('\u{4a6}', 1191), - ('\u{4a8}', 1193), ('\u{4aa}', 1195), ('\u{4ac}', 1197), ('\u{4ae}', 1199), - ('\u{4b0}', 1201), ('\u{4b2}', 1203), ('\u{4b4}', 1205), ('\u{4b6}', 1207), - ('\u{4b8}', 1209), ('\u{4ba}', 1211), ('\u{4bc}', 1213), ('\u{4be}', 1215), - ('\u{4c0}', 1231), ('\u{4c1}', 1218), ('\u{4c3}', 1220), ('\u{4c5}', 1222), - ('\u{4c7}', 1224), ('\u{4c9}', 1226), ('\u{4cb}', 1228), ('\u{4cd}', 1230), - ('\u{4d0}', 1233), ('\u{4d2}', 1235), ('\u{4d4}', 1237), ('\u{4d6}', 1239), - ('\u{4d8}', 1241), ('\u{4da}', 1243), ('\u{4dc}', 1245), ('\u{4de}', 1247), - ('\u{4e0}', 1249), ('\u{4e2}', 1251), ('\u{4e4}', 1253), ('\u{4e6}', 1255), - ('\u{4e8}', 1257), ('\u{4ea}', 1259), ('\u{4ec}', 1261), ('\u{4ee}', 1263), - ('\u{4f0}', 1265), ('\u{4f2}', 1267), ('\u{4f4}', 1269), ('\u{4f6}', 1271), - ('\u{4f8}', 1273), ('\u{4fa}', 1275), ('\u{4fc}', 1277), ('\u{4fe}', 1279), - ('\u{500}', 1281), ('\u{502}', 1283), ('\u{504}', 1285), ('\u{506}', 1287), - ('\u{508}', 1289), ('\u{50a}', 1291), ('\u{50c}', 1293), ('\u{50e}', 1295), - ('\u{510}', 1297), ('\u{512}', 1299), ('\u{514}', 1301), ('\u{516}', 1303), - ('\u{518}', 1305), ('\u{51a}', 1307), ('\u{51c}', 1309), ('\u{51e}', 1311), - ('\u{520}', 1313), ('\u{522}', 1315), ('\u{524}', 1317), ('\u{526}', 1319), - ('\u{528}', 1321), ('\u{52a}', 1323), ('\u{52c}', 1325), ('\u{52e}', 1327), - ('\u{531}', 1377), ('\u{532}', 1378), ('\u{533}', 1379), ('\u{534}', 1380), - ('\u{535}', 1381), ('\u{536}', 1382), ('\u{537}', 1383), ('\u{538}', 1384), - ('\u{539}', 1385), ('\u{53a}', 1386), ('\u{53b}', 1387), ('\u{53c}', 1388), - ('\u{53d}', 1389), ('\u{53e}', 1390), ('\u{53f}', 1391), ('\u{540}', 1392), - ('\u{541}', 1393), ('\u{542}', 1394), ('\u{543}', 1395), ('\u{544}', 1396), - ('\u{545}', 1397), ('\u{546}', 1398), ('\u{547}', 1399), ('\u{548}', 1400), - ('\u{549}', 1401), ('\u{54a}', 1402), ('\u{54b}', 1403), ('\u{54c}', 1404), - ('\u{54d}', 1405), ('\u{54e}', 1406), ('\u{54f}', 1407), ('\u{550}', 1408), - ('\u{551}', 1409), ('\u{552}', 1410), ('\u{553}', 1411), ('\u{554}', 1412), - ('\u{555}', 1413), ('\u{556}', 1414), ('\u{10a0}', 11520), ('\u{10a1}', 11521), - ('\u{10a2}', 11522), ('\u{10a3}', 11523), ('\u{10a4}', 11524), ('\u{10a5}', 11525), - ('\u{10a6}', 11526), ('\u{10a7}', 11527), ('\u{10a8}', 11528), ('\u{10a9}', 11529), - ('\u{10aa}', 11530), ('\u{10ab}', 11531), ('\u{10ac}', 11532), ('\u{10ad}', 11533), - ('\u{10ae}', 11534), ('\u{10af}', 11535), ('\u{10b0}', 11536), ('\u{10b1}', 11537), - ('\u{10b2}', 11538), ('\u{10b3}', 11539), ('\u{10b4}', 11540), ('\u{10b5}', 11541), - ('\u{10b6}', 11542), ('\u{10b7}', 11543), ('\u{10b8}', 11544), ('\u{10b9}', 11545), - ('\u{10ba}', 11546), ('\u{10bb}', 11547), ('\u{10bc}', 11548), ('\u{10bd}', 11549), - ('\u{10be}', 11550), ('\u{10bf}', 11551), ('\u{10c0}', 11552), ('\u{10c1}', 11553), - ('\u{10c2}', 11554), ('\u{10c3}', 11555), ('\u{10c4}', 11556), ('\u{10c5}', 11557), - ('\u{10c7}', 11559), ('\u{10cd}', 11565), ('\u{13a0}', 43888), ('\u{13a1}', 43889), - ('\u{13a2}', 43890), ('\u{13a3}', 43891), ('\u{13a4}', 43892), ('\u{13a5}', 43893), - ('\u{13a6}', 43894), ('\u{13a7}', 43895), ('\u{13a8}', 43896), ('\u{13a9}', 43897), - ('\u{13aa}', 43898), ('\u{13ab}', 43899), ('\u{13ac}', 43900), ('\u{13ad}', 43901), - ('\u{13ae}', 43902), ('\u{13af}', 43903), ('\u{13b0}', 43904), ('\u{13b1}', 43905), - ('\u{13b2}', 43906), ('\u{13b3}', 43907), ('\u{13b4}', 43908), ('\u{13b5}', 43909), - ('\u{13b6}', 43910), ('\u{13b7}', 43911), ('\u{13b8}', 43912), ('\u{13b9}', 43913), - ('\u{13ba}', 43914), ('\u{13bb}', 43915), ('\u{13bc}', 43916), ('\u{13bd}', 43917), - ('\u{13be}', 43918), ('\u{13bf}', 43919), ('\u{13c0}', 43920), ('\u{13c1}', 43921), - ('\u{13c2}', 43922), ('\u{13c3}', 43923), ('\u{13c4}', 43924), ('\u{13c5}', 43925), - ('\u{13c6}', 43926), ('\u{13c7}', 43927), ('\u{13c8}', 43928), ('\u{13c9}', 43929), - ('\u{13ca}', 43930), ('\u{13cb}', 43931), ('\u{13cc}', 43932), ('\u{13cd}', 43933), - ('\u{13ce}', 43934), ('\u{13cf}', 43935), ('\u{13d0}', 43936), ('\u{13d1}', 43937), - ('\u{13d2}', 43938), ('\u{13d3}', 43939), ('\u{13d4}', 43940), ('\u{13d5}', 43941), - ('\u{13d6}', 43942), ('\u{13d7}', 43943), ('\u{13d8}', 43944), ('\u{13d9}', 43945), - ('\u{13da}', 43946), ('\u{13db}', 43947), ('\u{13dc}', 43948), ('\u{13dd}', 43949), - ('\u{13de}', 43950), ('\u{13df}', 43951), ('\u{13e0}', 43952), ('\u{13e1}', 43953), - ('\u{13e2}', 43954), ('\u{13e3}', 43955), ('\u{13e4}', 43956), ('\u{13e5}', 43957), - ('\u{13e6}', 43958), ('\u{13e7}', 43959), ('\u{13e8}', 43960), ('\u{13e9}', 43961), - ('\u{13ea}', 43962), ('\u{13eb}', 43963), ('\u{13ec}', 43964), ('\u{13ed}', 43965), - ('\u{13ee}', 43966), ('\u{13ef}', 43967), ('\u{13f0}', 5112), ('\u{13f1}', 5113), - ('\u{13f2}', 5114), ('\u{13f3}', 5115), ('\u{13f4}', 5116), ('\u{13f5}', 5117), - ('\u{1c89}', 7306), ('\u{1c90}', 4304), ('\u{1c91}', 4305), ('\u{1c92}', 4306), - ('\u{1c93}', 4307), ('\u{1c94}', 4308), ('\u{1c95}', 4309), ('\u{1c96}', 4310), - ('\u{1c97}', 4311), ('\u{1c98}', 4312), ('\u{1c99}', 4313), ('\u{1c9a}', 4314), - ('\u{1c9b}', 4315), ('\u{1c9c}', 4316), ('\u{1c9d}', 4317), ('\u{1c9e}', 4318), - ('\u{1c9f}', 4319), ('\u{1ca0}', 4320), ('\u{1ca1}', 4321), ('\u{1ca2}', 4322), - ('\u{1ca3}', 4323), ('\u{1ca4}', 4324), ('\u{1ca5}', 4325), ('\u{1ca6}', 4326), - ('\u{1ca7}', 4327), ('\u{1ca8}', 4328), ('\u{1ca9}', 4329), ('\u{1caa}', 4330), - ('\u{1cab}', 4331), ('\u{1cac}', 4332), ('\u{1cad}', 4333), ('\u{1cae}', 4334), - ('\u{1caf}', 4335), ('\u{1cb0}', 4336), ('\u{1cb1}', 4337), ('\u{1cb2}', 4338), - ('\u{1cb3}', 4339), ('\u{1cb4}', 4340), ('\u{1cb5}', 4341), ('\u{1cb6}', 4342), - ('\u{1cb7}', 4343), ('\u{1cb8}', 4344), ('\u{1cb9}', 4345), ('\u{1cba}', 4346), - ('\u{1cbd}', 4349), ('\u{1cbe}', 4350), ('\u{1cbf}', 4351), ('\u{1e00}', 7681), - ('\u{1e02}', 7683), ('\u{1e04}', 7685), ('\u{1e06}', 7687), ('\u{1e08}', 7689), - ('\u{1e0a}', 7691), ('\u{1e0c}', 7693), ('\u{1e0e}', 7695), ('\u{1e10}', 7697), - ('\u{1e12}', 7699), ('\u{1e14}', 7701), ('\u{1e16}', 7703), ('\u{1e18}', 7705), - ('\u{1e1a}', 7707), ('\u{1e1c}', 7709), ('\u{1e1e}', 7711), ('\u{1e20}', 7713), - ('\u{1e22}', 7715), ('\u{1e24}', 7717), ('\u{1e26}', 7719), ('\u{1e28}', 7721), - ('\u{1e2a}', 7723), ('\u{1e2c}', 7725), ('\u{1e2e}', 7727), ('\u{1e30}', 7729), - ('\u{1e32}', 7731), ('\u{1e34}', 7733), ('\u{1e36}', 7735), ('\u{1e38}', 7737), - ('\u{1e3a}', 7739), ('\u{1e3c}', 7741), ('\u{1e3e}', 7743), ('\u{1e40}', 7745), - ('\u{1e42}', 7747), ('\u{1e44}', 7749), ('\u{1e46}', 7751), ('\u{1e48}', 7753), - ('\u{1e4a}', 7755), ('\u{1e4c}', 7757), ('\u{1e4e}', 7759), ('\u{1e50}', 7761), - ('\u{1e52}', 7763), ('\u{1e54}', 7765), ('\u{1e56}', 7767), ('\u{1e58}', 7769), - ('\u{1e5a}', 7771), ('\u{1e5c}', 7773), ('\u{1e5e}', 7775), ('\u{1e60}', 7777), - ('\u{1e62}', 7779), ('\u{1e64}', 7781), ('\u{1e66}', 7783), ('\u{1e68}', 7785), - ('\u{1e6a}', 7787), ('\u{1e6c}', 7789), ('\u{1e6e}', 7791), ('\u{1e70}', 7793), - ('\u{1e72}', 7795), ('\u{1e74}', 7797), ('\u{1e76}', 7799), ('\u{1e78}', 7801), - ('\u{1e7a}', 7803), ('\u{1e7c}', 7805), ('\u{1e7e}', 7807), ('\u{1e80}', 7809), - ('\u{1e82}', 7811), ('\u{1e84}', 7813), ('\u{1e86}', 7815), ('\u{1e88}', 7817), - ('\u{1e8a}', 7819), ('\u{1e8c}', 7821), ('\u{1e8e}', 7823), ('\u{1e90}', 7825), - ('\u{1e92}', 7827), ('\u{1e94}', 7829), ('\u{1e9e}', 223), ('\u{1ea0}', 7841), - ('\u{1ea2}', 7843), ('\u{1ea4}', 7845), ('\u{1ea6}', 7847), ('\u{1ea8}', 7849), - ('\u{1eaa}', 7851), ('\u{1eac}', 7853), ('\u{1eae}', 7855), ('\u{1eb0}', 7857), - ('\u{1eb2}', 7859), ('\u{1eb4}', 7861), ('\u{1eb6}', 7863), ('\u{1eb8}', 7865), - ('\u{1eba}', 7867), ('\u{1ebc}', 7869), ('\u{1ebe}', 7871), ('\u{1ec0}', 7873), - ('\u{1ec2}', 7875), ('\u{1ec4}', 7877), ('\u{1ec6}', 7879), ('\u{1ec8}', 7881), - ('\u{1eca}', 7883), ('\u{1ecc}', 7885), ('\u{1ece}', 7887), ('\u{1ed0}', 7889), - ('\u{1ed2}', 7891), ('\u{1ed4}', 7893), ('\u{1ed6}', 7895), ('\u{1ed8}', 7897), - ('\u{1eda}', 7899), ('\u{1edc}', 7901), ('\u{1ede}', 7903), ('\u{1ee0}', 7905), - ('\u{1ee2}', 7907), ('\u{1ee4}', 7909), ('\u{1ee6}', 7911), ('\u{1ee8}', 7913), - ('\u{1eea}', 7915), ('\u{1eec}', 7917), ('\u{1eee}', 7919), ('\u{1ef0}', 7921), - ('\u{1ef2}', 7923), ('\u{1ef4}', 7925), ('\u{1ef6}', 7927), ('\u{1ef8}', 7929), - ('\u{1efa}', 7931), ('\u{1efc}', 7933), ('\u{1efe}', 7935), ('\u{1f08}', 7936), - ('\u{1f09}', 7937), ('\u{1f0a}', 7938), ('\u{1f0b}', 7939), ('\u{1f0c}', 7940), - ('\u{1f0d}', 7941), ('\u{1f0e}', 7942), ('\u{1f0f}', 7943), ('\u{1f18}', 7952), - ('\u{1f19}', 7953), ('\u{1f1a}', 7954), ('\u{1f1b}', 7955), ('\u{1f1c}', 7956), - ('\u{1f1d}', 7957), ('\u{1f28}', 7968), ('\u{1f29}', 7969), ('\u{1f2a}', 7970), - ('\u{1f2b}', 7971), ('\u{1f2c}', 7972), ('\u{1f2d}', 7973), ('\u{1f2e}', 7974), - ('\u{1f2f}', 7975), ('\u{1f38}', 7984), ('\u{1f39}', 7985), ('\u{1f3a}', 7986), - ('\u{1f3b}', 7987), ('\u{1f3c}', 7988), ('\u{1f3d}', 7989), ('\u{1f3e}', 7990), - ('\u{1f3f}', 7991), ('\u{1f48}', 8000), ('\u{1f49}', 8001), ('\u{1f4a}', 8002), - ('\u{1f4b}', 8003), ('\u{1f4c}', 8004), ('\u{1f4d}', 8005), ('\u{1f59}', 8017), - ('\u{1f5b}', 8019), ('\u{1f5d}', 8021), ('\u{1f5f}', 8023), ('\u{1f68}', 8032), - ('\u{1f69}', 8033), ('\u{1f6a}', 8034), ('\u{1f6b}', 8035), ('\u{1f6c}', 8036), - ('\u{1f6d}', 8037), ('\u{1f6e}', 8038), ('\u{1f6f}', 8039), ('\u{1f88}', 8064), - ('\u{1f89}', 8065), ('\u{1f8a}', 8066), ('\u{1f8b}', 8067), ('\u{1f8c}', 8068), - ('\u{1f8d}', 8069), ('\u{1f8e}', 8070), ('\u{1f8f}', 8071), ('\u{1f98}', 8080), - ('\u{1f99}', 8081), ('\u{1f9a}', 8082), ('\u{1f9b}', 8083), ('\u{1f9c}', 8084), - ('\u{1f9d}', 8085), ('\u{1f9e}', 8086), ('\u{1f9f}', 8087), ('\u{1fa8}', 8096), - ('\u{1fa9}', 8097), ('\u{1faa}', 8098), ('\u{1fab}', 8099), ('\u{1fac}', 8100), - ('\u{1fad}', 8101), ('\u{1fae}', 8102), ('\u{1faf}', 8103), ('\u{1fb8}', 8112), - ('\u{1fb9}', 8113), ('\u{1fba}', 8048), ('\u{1fbb}', 8049), ('\u{1fbc}', 8115), - ('\u{1fc8}', 8050), ('\u{1fc9}', 8051), ('\u{1fca}', 8052), ('\u{1fcb}', 8053), - ('\u{1fcc}', 8131), ('\u{1fd8}', 8144), ('\u{1fd9}', 8145), ('\u{1fda}', 8054), - ('\u{1fdb}', 8055), ('\u{1fe8}', 8160), ('\u{1fe9}', 8161), ('\u{1fea}', 8058), - ('\u{1feb}', 8059), ('\u{1fec}', 8165), ('\u{1ff8}', 8056), ('\u{1ff9}', 8057), - ('\u{1ffa}', 8060), ('\u{1ffb}', 8061), ('\u{1ffc}', 8179), ('\u{2126}', 969), - ('\u{212a}', 107), ('\u{212b}', 229), ('\u{2132}', 8526), ('\u{2160}', 8560), - ('\u{2161}', 8561), ('\u{2162}', 8562), ('\u{2163}', 8563), ('\u{2164}', 8564), - ('\u{2165}', 8565), ('\u{2166}', 8566), ('\u{2167}', 8567), ('\u{2168}', 8568), - ('\u{2169}', 8569), ('\u{216a}', 8570), ('\u{216b}', 8571), ('\u{216c}', 8572), - ('\u{216d}', 8573), ('\u{216e}', 8574), ('\u{216f}', 8575), ('\u{2183}', 8580), - ('\u{24b6}', 9424), ('\u{24b7}', 9425), ('\u{24b8}', 9426), ('\u{24b9}', 9427), - ('\u{24ba}', 9428), ('\u{24bb}', 9429), ('\u{24bc}', 9430), ('\u{24bd}', 9431), - ('\u{24be}', 9432), ('\u{24bf}', 9433), ('\u{24c0}', 9434), ('\u{24c1}', 9435), - ('\u{24c2}', 9436), ('\u{24c3}', 9437), ('\u{24c4}', 9438), ('\u{24c5}', 9439), - ('\u{24c6}', 9440), ('\u{24c7}', 9441), ('\u{24c8}', 9442), ('\u{24c9}', 9443), - ('\u{24ca}', 9444), ('\u{24cb}', 9445), ('\u{24cc}', 9446), ('\u{24cd}', 9447), - ('\u{24ce}', 9448), ('\u{24cf}', 9449), ('\u{2c00}', 11312), ('\u{2c01}', 11313), - ('\u{2c02}', 11314), ('\u{2c03}', 11315), ('\u{2c04}', 11316), ('\u{2c05}', 11317), - ('\u{2c06}', 11318), ('\u{2c07}', 11319), ('\u{2c08}', 11320), ('\u{2c09}', 11321), - ('\u{2c0a}', 11322), ('\u{2c0b}', 11323), ('\u{2c0c}', 11324), ('\u{2c0d}', 11325), - ('\u{2c0e}', 11326), ('\u{2c0f}', 11327), ('\u{2c10}', 11328), ('\u{2c11}', 11329), - ('\u{2c12}', 11330), ('\u{2c13}', 11331), ('\u{2c14}', 11332), ('\u{2c15}', 11333), - ('\u{2c16}', 11334), ('\u{2c17}', 11335), ('\u{2c18}', 11336), ('\u{2c19}', 11337), - ('\u{2c1a}', 11338), ('\u{2c1b}', 11339), ('\u{2c1c}', 11340), ('\u{2c1d}', 11341), - ('\u{2c1e}', 11342), ('\u{2c1f}', 11343), ('\u{2c20}', 11344), ('\u{2c21}', 11345), - ('\u{2c22}', 11346), ('\u{2c23}', 11347), ('\u{2c24}', 11348), ('\u{2c25}', 11349), - ('\u{2c26}', 11350), ('\u{2c27}', 11351), ('\u{2c28}', 11352), ('\u{2c29}', 11353), - ('\u{2c2a}', 11354), ('\u{2c2b}', 11355), ('\u{2c2c}', 11356), ('\u{2c2d}', 11357), - ('\u{2c2e}', 11358), ('\u{2c2f}', 11359), ('\u{2c60}', 11361), ('\u{2c62}', 619), - ('\u{2c63}', 7549), ('\u{2c64}', 637), ('\u{2c67}', 11368), ('\u{2c69}', 11370), - ('\u{2c6b}', 11372), ('\u{2c6d}', 593), ('\u{2c6e}', 625), ('\u{2c6f}', 592), - ('\u{2c70}', 594), ('\u{2c72}', 11379), ('\u{2c75}', 11382), ('\u{2c7e}', 575), - ('\u{2c7f}', 576), ('\u{2c80}', 11393), ('\u{2c82}', 11395), ('\u{2c84}', 11397), - ('\u{2c86}', 11399), ('\u{2c88}', 11401), ('\u{2c8a}', 11403), ('\u{2c8c}', 11405), - ('\u{2c8e}', 11407), ('\u{2c90}', 11409), ('\u{2c92}', 11411), ('\u{2c94}', 11413), - ('\u{2c96}', 11415), ('\u{2c98}', 11417), ('\u{2c9a}', 11419), ('\u{2c9c}', 11421), - ('\u{2c9e}', 11423), ('\u{2ca0}', 11425), ('\u{2ca2}', 11427), ('\u{2ca4}', 11429), - ('\u{2ca6}', 11431), ('\u{2ca8}', 11433), ('\u{2caa}', 11435), ('\u{2cac}', 11437), - ('\u{2cae}', 11439), ('\u{2cb0}', 11441), ('\u{2cb2}', 11443), ('\u{2cb4}', 11445), - ('\u{2cb6}', 11447), ('\u{2cb8}', 11449), ('\u{2cba}', 11451), ('\u{2cbc}', 11453), - ('\u{2cbe}', 11455), ('\u{2cc0}', 11457), ('\u{2cc2}', 11459), ('\u{2cc4}', 11461), - ('\u{2cc6}', 11463), ('\u{2cc8}', 11465), ('\u{2cca}', 11467), ('\u{2ccc}', 11469), - ('\u{2cce}', 11471), ('\u{2cd0}', 11473), ('\u{2cd2}', 11475), ('\u{2cd4}', 11477), - ('\u{2cd6}', 11479), ('\u{2cd8}', 11481), ('\u{2cda}', 11483), ('\u{2cdc}', 11485), - ('\u{2cde}', 11487), ('\u{2ce0}', 11489), ('\u{2ce2}', 11491), ('\u{2ceb}', 11500), - ('\u{2ced}', 11502), ('\u{2cf2}', 11507), ('\u{a640}', 42561), ('\u{a642}', 42563), - ('\u{a644}', 42565), ('\u{a646}', 42567), ('\u{a648}', 42569), ('\u{a64a}', 42571), - ('\u{a64c}', 42573), ('\u{a64e}', 42575), ('\u{a650}', 42577), ('\u{a652}', 42579), - ('\u{a654}', 42581), ('\u{a656}', 42583), ('\u{a658}', 42585), ('\u{a65a}', 42587), - ('\u{a65c}', 42589), ('\u{a65e}', 42591), ('\u{a660}', 42593), ('\u{a662}', 42595), - ('\u{a664}', 42597), ('\u{a666}', 42599), ('\u{a668}', 42601), ('\u{a66a}', 42603), - ('\u{a66c}', 42605), ('\u{a680}', 42625), ('\u{a682}', 42627), ('\u{a684}', 42629), - ('\u{a686}', 42631), ('\u{a688}', 42633), ('\u{a68a}', 42635), ('\u{a68c}', 42637), - ('\u{a68e}', 42639), ('\u{a690}', 42641), ('\u{a692}', 42643), ('\u{a694}', 42645), - ('\u{a696}', 42647), ('\u{a698}', 42649), ('\u{a69a}', 42651), ('\u{a722}', 42787), - ('\u{a724}', 42789), ('\u{a726}', 42791), ('\u{a728}', 42793), ('\u{a72a}', 42795), - ('\u{a72c}', 42797), ('\u{a72e}', 42799), ('\u{a732}', 42803), ('\u{a734}', 42805), - ('\u{a736}', 42807), ('\u{a738}', 42809), ('\u{a73a}', 42811), ('\u{a73c}', 42813), - ('\u{a73e}', 42815), ('\u{a740}', 42817), ('\u{a742}', 42819), ('\u{a744}', 42821), - ('\u{a746}', 42823), ('\u{a748}', 42825), ('\u{a74a}', 42827), ('\u{a74c}', 42829), - ('\u{a74e}', 42831), ('\u{a750}', 42833), ('\u{a752}', 42835), ('\u{a754}', 42837), - ('\u{a756}', 42839), ('\u{a758}', 42841), ('\u{a75a}', 42843), ('\u{a75c}', 42845), - ('\u{a75e}', 42847), ('\u{a760}', 42849), ('\u{a762}', 42851), ('\u{a764}', 42853), - ('\u{a766}', 42855), ('\u{a768}', 42857), ('\u{a76a}', 42859), ('\u{a76c}', 42861), - ('\u{a76e}', 42863), ('\u{a779}', 42874), ('\u{a77b}', 42876), ('\u{a77d}', 7545), - ('\u{a77e}', 42879), ('\u{a780}', 42881), ('\u{a782}', 42883), ('\u{a784}', 42885), - ('\u{a786}', 42887), ('\u{a78b}', 42892), ('\u{a78d}', 613), ('\u{a790}', 42897), - ('\u{a792}', 42899), ('\u{a796}', 42903), ('\u{a798}', 42905), ('\u{a79a}', 42907), - ('\u{a79c}', 42909), ('\u{a79e}', 42911), ('\u{a7a0}', 42913), ('\u{a7a2}', 42915), - ('\u{a7a4}', 42917), ('\u{a7a6}', 42919), ('\u{a7a8}', 42921), ('\u{a7aa}', 614), - ('\u{a7ab}', 604), ('\u{a7ac}', 609), ('\u{a7ad}', 620), ('\u{a7ae}', 618), - ('\u{a7b0}', 670), ('\u{a7b1}', 647), ('\u{a7b2}', 669), ('\u{a7b3}', 43859), - ('\u{a7b4}', 42933), ('\u{a7b6}', 42935), ('\u{a7b8}', 42937), ('\u{a7ba}', 42939), - ('\u{a7bc}', 42941), ('\u{a7be}', 42943), ('\u{a7c0}', 42945), ('\u{a7c2}', 42947), - ('\u{a7c4}', 42900), ('\u{a7c5}', 642), ('\u{a7c6}', 7566), ('\u{a7c7}', 42952), - ('\u{a7c9}', 42954), ('\u{a7cb}', 612), ('\u{a7cc}', 42957), ('\u{a7ce}', 42959), - ('\u{a7d0}', 42961), ('\u{a7d2}', 42963), ('\u{a7d4}', 42965), ('\u{a7d6}', 42967), - ('\u{a7d8}', 42969), ('\u{a7da}', 42971), ('\u{a7dc}', 411), ('\u{a7f5}', 42998), - ('\u{ff21}', 65345), ('\u{ff22}', 65346), ('\u{ff23}', 65347), ('\u{ff24}', 65348), - ('\u{ff25}', 65349), ('\u{ff26}', 65350), ('\u{ff27}', 65351), ('\u{ff28}', 65352), - ('\u{ff29}', 65353), ('\u{ff2a}', 65354), ('\u{ff2b}', 65355), ('\u{ff2c}', 65356), - ('\u{ff2d}', 65357), ('\u{ff2e}', 65358), ('\u{ff2f}', 65359), ('\u{ff30}', 65360), - ('\u{ff31}', 65361), ('\u{ff32}', 65362), ('\u{ff33}', 65363), ('\u{ff34}', 65364), - ('\u{ff35}', 65365), ('\u{ff36}', 65366), ('\u{ff37}', 65367), ('\u{ff38}', 65368), - ('\u{ff39}', 65369), ('\u{ff3a}', 65370), ('\u{10400}', 66600), ('\u{10401}', 66601), - ('\u{10402}', 66602), ('\u{10403}', 66603), ('\u{10404}', 66604), ('\u{10405}', 66605), - ('\u{10406}', 66606), ('\u{10407}', 66607), ('\u{10408}', 66608), ('\u{10409}', 66609), - ('\u{1040a}', 66610), ('\u{1040b}', 66611), ('\u{1040c}', 66612), ('\u{1040d}', 66613), - ('\u{1040e}', 66614), ('\u{1040f}', 66615), ('\u{10410}', 66616), ('\u{10411}', 66617), - ('\u{10412}', 66618), ('\u{10413}', 66619), ('\u{10414}', 66620), ('\u{10415}', 66621), - ('\u{10416}', 66622), ('\u{10417}', 66623), ('\u{10418}', 66624), ('\u{10419}', 66625), - ('\u{1041a}', 66626), ('\u{1041b}', 66627), ('\u{1041c}', 66628), ('\u{1041d}', 66629), - ('\u{1041e}', 66630), ('\u{1041f}', 66631), ('\u{10420}', 66632), ('\u{10421}', 66633), - ('\u{10422}', 66634), ('\u{10423}', 66635), ('\u{10424}', 66636), ('\u{10425}', 66637), - ('\u{10426}', 66638), ('\u{10427}', 66639), ('\u{104b0}', 66776), ('\u{104b1}', 66777), - ('\u{104b2}', 66778), ('\u{104b3}', 66779), ('\u{104b4}', 66780), ('\u{104b5}', 66781), - ('\u{104b6}', 66782), ('\u{104b7}', 66783), ('\u{104b8}', 66784), ('\u{104b9}', 66785), - ('\u{104ba}', 66786), ('\u{104bb}', 66787), ('\u{104bc}', 66788), ('\u{104bd}', 66789), - ('\u{104be}', 66790), ('\u{104bf}', 66791), ('\u{104c0}', 66792), ('\u{104c1}', 66793), - ('\u{104c2}', 66794), ('\u{104c3}', 66795), ('\u{104c4}', 66796), ('\u{104c5}', 66797), - ('\u{104c6}', 66798), ('\u{104c7}', 66799), ('\u{104c8}', 66800), ('\u{104c9}', 66801), - ('\u{104ca}', 66802), ('\u{104cb}', 66803), ('\u{104cc}', 66804), ('\u{104cd}', 66805), - ('\u{104ce}', 66806), ('\u{104cf}', 66807), ('\u{104d0}', 66808), ('\u{104d1}', 66809), - ('\u{104d2}', 66810), ('\u{104d3}', 66811), ('\u{10570}', 66967), ('\u{10571}', 66968), - ('\u{10572}', 66969), ('\u{10573}', 66970), ('\u{10574}', 66971), ('\u{10575}', 66972), - ('\u{10576}', 66973), ('\u{10577}', 66974), ('\u{10578}', 66975), ('\u{10579}', 66976), - ('\u{1057a}', 66977), ('\u{1057c}', 66979), ('\u{1057d}', 66980), ('\u{1057e}', 66981), - ('\u{1057f}', 66982), ('\u{10580}', 66983), ('\u{10581}', 66984), ('\u{10582}', 66985), - ('\u{10583}', 66986), ('\u{10584}', 66987), ('\u{10585}', 66988), ('\u{10586}', 66989), - ('\u{10587}', 66990), ('\u{10588}', 66991), ('\u{10589}', 66992), ('\u{1058a}', 66993), - ('\u{1058c}', 66995), ('\u{1058d}', 66996), ('\u{1058e}', 66997), ('\u{1058f}', 66998), - ('\u{10590}', 66999), ('\u{10591}', 67000), ('\u{10592}', 67001), ('\u{10594}', 67003), - ('\u{10595}', 67004), ('\u{10c80}', 68800), ('\u{10c81}', 68801), ('\u{10c82}', 68802), - ('\u{10c83}', 68803), ('\u{10c84}', 68804), ('\u{10c85}', 68805), ('\u{10c86}', 68806), - ('\u{10c87}', 68807), ('\u{10c88}', 68808), ('\u{10c89}', 68809), ('\u{10c8a}', 68810), - ('\u{10c8b}', 68811), ('\u{10c8c}', 68812), ('\u{10c8d}', 68813), ('\u{10c8e}', 68814), - ('\u{10c8f}', 68815), ('\u{10c90}', 68816), ('\u{10c91}', 68817), ('\u{10c92}', 68818), - ('\u{10c93}', 68819), ('\u{10c94}', 68820), ('\u{10c95}', 68821), ('\u{10c96}', 68822), - ('\u{10c97}', 68823), ('\u{10c98}', 68824), ('\u{10c99}', 68825), ('\u{10c9a}', 68826), - ('\u{10c9b}', 68827), ('\u{10c9c}', 68828), ('\u{10c9d}', 68829), ('\u{10c9e}', 68830), - ('\u{10c9f}', 68831), ('\u{10ca0}', 68832), ('\u{10ca1}', 68833), ('\u{10ca2}', 68834), - ('\u{10ca3}', 68835), ('\u{10ca4}', 68836), ('\u{10ca5}', 68837), ('\u{10ca6}', 68838), - ('\u{10ca7}', 68839), ('\u{10ca8}', 68840), ('\u{10ca9}', 68841), ('\u{10caa}', 68842), - ('\u{10cab}', 68843), ('\u{10cac}', 68844), ('\u{10cad}', 68845), ('\u{10cae}', 68846), - ('\u{10caf}', 68847), ('\u{10cb0}', 68848), ('\u{10cb1}', 68849), ('\u{10cb2}', 68850), - ('\u{10d50}', 68976), ('\u{10d51}', 68977), ('\u{10d52}', 68978), ('\u{10d53}', 68979), - ('\u{10d54}', 68980), ('\u{10d55}', 68981), ('\u{10d56}', 68982), ('\u{10d57}', 68983), - ('\u{10d58}', 68984), ('\u{10d59}', 68985), ('\u{10d5a}', 68986), ('\u{10d5b}', 68987), - ('\u{10d5c}', 68988), ('\u{10d5d}', 68989), ('\u{10d5e}', 68990), ('\u{10d5f}', 68991), - ('\u{10d60}', 68992), ('\u{10d61}', 68993), ('\u{10d62}', 68994), ('\u{10d63}', 68995), - ('\u{10d64}', 68996), ('\u{10d65}', 68997), ('\u{118a0}', 71872), ('\u{118a1}', 71873), - ('\u{118a2}', 71874), ('\u{118a3}', 71875), ('\u{118a4}', 71876), ('\u{118a5}', 71877), - ('\u{118a6}', 71878), ('\u{118a7}', 71879), ('\u{118a8}', 71880), ('\u{118a9}', 71881), - ('\u{118aa}', 71882), ('\u{118ab}', 71883), ('\u{118ac}', 71884), ('\u{118ad}', 71885), - ('\u{118ae}', 71886), ('\u{118af}', 71887), ('\u{118b0}', 71888), ('\u{118b1}', 71889), - ('\u{118b2}', 71890), ('\u{118b3}', 71891), ('\u{118b4}', 71892), ('\u{118b5}', 71893), - ('\u{118b6}', 71894), ('\u{118b7}', 71895), ('\u{118b8}', 71896), ('\u{118b9}', 71897), - ('\u{118ba}', 71898), ('\u{118bb}', 71899), ('\u{118bc}', 71900), ('\u{118bd}', 71901), - ('\u{118be}', 71902), ('\u{118bf}', 71903), ('\u{16e40}', 93792), ('\u{16e41}', 93793), - ('\u{16e42}', 93794), ('\u{16e43}', 93795), ('\u{16e44}', 93796), ('\u{16e45}', 93797), - ('\u{16e46}', 93798), ('\u{16e47}', 93799), ('\u{16e48}', 93800), ('\u{16e49}', 93801), - ('\u{16e4a}', 93802), ('\u{16e4b}', 93803), ('\u{16e4c}', 93804), ('\u{16e4d}', 93805), - ('\u{16e4e}', 93806), ('\u{16e4f}', 93807), ('\u{16e50}', 93808), ('\u{16e51}', 93809), - ('\u{16e52}', 93810), ('\u{16e53}', 93811), ('\u{16e54}', 93812), ('\u{16e55}', 93813), - ('\u{16e56}', 93814), ('\u{16e57}', 93815), ('\u{16e58}', 93816), ('\u{16e59}', 93817), - ('\u{16e5a}', 93818), ('\u{16e5b}', 93819), ('\u{16e5c}', 93820), ('\u{16e5d}', 93821), - ('\u{16e5e}', 93822), ('\u{16e5f}', 93823), ('\u{16ea0}', 93883), ('\u{16ea1}', 93884), - ('\u{16ea2}', 93885), ('\u{16ea3}', 93886), ('\u{16ea4}', 93887), ('\u{16ea5}', 93888), - ('\u{16ea6}', 93889), ('\u{16ea7}', 93890), ('\u{16ea8}', 93891), ('\u{16ea9}', 93892), - ('\u{16eaa}', 93893), ('\u{16eab}', 93894), ('\u{16eac}', 93895), ('\u{16ead}', 93896), - ('\u{16eae}', 93897), ('\u{16eaf}', 93898), ('\u{16eb0}', 93899), ('\u{16eb1}', 93900), - ('\u{16eb2}', 93901), ('\u{16eb3}', 93902), ('\u{16eb4}', 93903), ('\u{16eb5}', 93904), - ('\u{16eb6}', 93905), ('\u{16eb7}', 93906), ('\u{16eb8}', 93907), ('\u{1e900}', 125218), - ('\u{1e901}', 125219), ('\u{1e902}', 125220), ('\u{1e903}', 125221), ('\u{1e904}', 125222), - ('\u{1e905}', 125223), ('\u{1e906}', 125224), ('\u{1e907}', 125225), ('\u{1e908}', 125226), - ('\u{1e909}', 125227), ('\u{1e90a}', 125228), ('\u{1e90b}', 125229), ('\u{1e90c}', 125230), - ('\u{1e90d}', 125231), ('\u{1e90e}', 125232), ('\u{1e90f}', 125233), ('\u{1e910}', 125234), - ('\u{1e911}', 125235), ('\u{1e912}', 125236), ('\u{1e913}', 125237), ('\u{1e914}', 125238), - ('\u{1e915}', 125239), ('\u{1e916}', 125240), ('\u{1e917}', 125241), ('\u{1e918}', 125242), - ('\u{1e919}', 125243), ('\u{1e91a}', 125244), ('\u{1e91b}', 125245), ('\u{1e91c}', 125246), - ('\u{1e91d}', 125247), ('\u{1e91e}', 125248), ('\u{1e91f}', 125249), ('\u{1e920}', 125250), - ('\u{1e921}', 125251), + static UPPERCASE_TABLE: &[(char, u32); 1554] = &[ + ('\u{b5}', 0x39c), ('\u{df}', 0x400000), ('\u{e0}', 0xc0), ('\u{e1}', 0xc1), + ('\u{e2}', 0xc2), ('\u{e3}', 0xc3), ('\u{e4}', 0xc4), ('\u{e5}', 0xc5), ('\u{e6}', 0xc6), + ('\u{e7}', 0xc7), ('\u{e8}', 0xc8), ('\u{e9}', 0xc9), ('\u{ea}', 0xca), ('\u{eb}', 0xcb), + ('\u{ec}', 0xcc), ('\u{ed}', 0xcd), ('\u{ee}', 0xce), ('\u{ef}', 0xcf), ('\u{f0}', 0xd0), + ('\u{f1}', 0xd1), ('\u{f2}', 0xd2), ('\u{f3}', 0xd3), ('\u{f4}', 0xd4), ('\u{f5}', 0xd5), + ('\u{f6}', 0xd6), ('\u{f8}', 0xd8), ('\u{f9}', 0xd9), ('\u{fa}', 0xda), ('\u{fb}', 0xdb), + ('\u{fc}', 0xdc), ('\u{fd}', 0xdd), ('\u{fe}', 0xde), ('\u{ff}', 0x178), ('\u{101}', 0x100), + ('\u{103}', 0x102), ('\u{105}', 0x104), ('\u{107}', 0x106), ('\u{109}', 0x108), + ('\u{10b}', 0x10a), ('\u{10d}', 0x10c), ('\u{10f}', 0x10e), ('\u{111}', 0x110), + ('\u{113}', 0x112), ('\u{115}', 0x114), ('\u{117}', 0x116), ('\u{119}', 0x118), + ('\u{11b}', 0x11a), ('\u{11d}', 0x11c), ('\u{11f}', 0x11e), ('\u{121}', 0x120), + ('\u{123}', 0x122), ('\u{125}', 0x124), ('\u{127}', 0x126), ('\u{129}', 0x128), + ('\u{12b}', 0x12a), ('\u{12d}', 0x12c), ('\u{12f}', 0x12e), ('\u{131}', 0x49), + ('\u{133}', 0x132), ('\u{135}', 0x134), ('\u{137}', 0x136), ('\u{13a}', 0x139), + ('\u{13c}', 0x13b), ('\u{13e}', 0x13d), ('\u{140}', 0x13f), ('\u{142}', 0x141), + ('\u{144}', 0x143), ('\u{146}', 0x145), ('\u{148}', 0x147), ('\u{149}', 0x400001), + ('\u{14b}', 0x14a), ('\u{14d}', 0x14c), ('\u{14f}', 0x14e), ('\u{151}', 0x150), + ('\u{153}', 0x152), ('\u{155}', 0x154), ('\u{157}', 0x156), ('\u{159}', 0x158), + ('\u{15b}', 0x15a), ('\u{15d}', 0x15c), ('\u{15f}', 0x15e), ('\u{161}', 0x160), + ('\u{163}', 0x162), ('\u{165}', 0x164), ('\u{167}', 0x166), ('\u{169}', 0x168), + ('\u{16b}', 0x16a), ('\u{16d}', 0x16c), ('\u{16f}', 0x16e), ('\u{171}', 0x170), + ('\u{173}', 0x172), ('\u{175}', 0x174), ('\u{177}', 0x176), ('\u{17a}', 0x179), + ('\u{17c}', 0x17b), ('\u{17e}', 0x17d), ('\u{17f}', 0x53), ('\u{180}', 0x243), + ('\u{183}', 0x182), ('\u{185}', 0x184), ('\u{188}', 0x187), ('\u{18c}', 0x18b), + ('\u{192}', 0x191), ('\u{195}', 0x1f6), ('\u{199}', 0x198), ('\u{19a}', 0x23d), + ('\u{19b}', 0xa7dc), ('\u{19e}', 0x220), ('\u{1a1}', 0x1a0), ('\u{1a3}', 0x1a2), + ('\u{1a5}', 0x1a4), ('\u{1a8}', 0x1a7), ('\u{1ad}', 0x1ac), ('\u{1b0}', 0x1af), + ('\u{1b4}', 0x1b3), ('\u{1b6}', 0x1b5), ('\u{1b9}', 0x1b8), ('\u{1bd}', 0x1bc), + ('\u{1bf}', 0x1f7), ('\u{1c5}', 0x1c4), ('\u{1c6}', 0x1c4), ('\u{1c8}', 0x1c7), + ('\u{1c9}', 0x1c7), ('\u{1cb}', 0x1ca), ('\u{1cc}', 0x1ca), ('\u{1ce}', 0x1cd), + ('\u{1d0}', 0x1cf), ('\u{1d2}', 0x1d1), ('\u{1d4}', 0x1d3), ('\u{1d6}', 0x1d5), + ('\u{1d8}', 0x1d7), ('\u{1da}', 0x1d9), ('\u{1dc}', 0x1db), ('\u{1dd}', 0x18e), + ('\u{1df}', 0x1de), ('\u{1e1}', 0x1e0), ('\u{1e3}', 0x1e2), ('\u{1e5}', 0x1e4), + ('\u{1e7}', 0x1e6), ('\u{1e9}', 0x1e8), ('\u{1eb}', 0x1ea), ('\u{1ed}', 0x1ec), + ('\u{1ef}', 0x1ee), ('\u{1f0}', 0x400002), ('\u{1f2}', 0x1f1), ('\u{1f3}', 0x1f1), + ('\u{1f5}', 0x1f4), ('\u{1f9}', 0x1f8), ('\u{1fb}', 0x1fa), ('\u{1fd}', 0x1fc), + ('\u{1ff}', 0x1fe), ('\u{201}', 0x200), ('\u{203}', 0x202), ('\u{205}', 0x204), + ('\u{207}', 0x206), ('\u{209}', 0x208), ('\u{20b}', 0x20a), ('\u{20d}', 0x20c), + ('\u{20f}', 0x20e), ('\u{211}', 0x210), ('\u{213}', 0x212), ('\u{215}', 0x214), + ('\u{217}', 0x216), ('\u{219}', 0x218), ('\u{21b}', 0x21a), ('\u{21d}', 0x21c), + ('\u{21f}', 0x21e), ('\u{223}', 0x222), ('\u{225}', 0x224), ('\u{227}', 0x226), + ('\u{229}', 0x228), ('\u{22b}', 0x22a), ('\u{22d}', 0x22c), ('\u{22f}', 0x22e), + ('\u{231}', 0x230), ('\u{233}', 0x232), ('\u{23c}', 0x23b), ('\u{23f}', 0x2c7e), + ('\u{240}', 0x2c7f), ('\u{242}', 0x241), ('\u{247}', 0x246), ('\u{249}', 0x248), + ('\u{24b}', 0x24a), ('\u{24d}', 0x24c), ('\u{24f}', 0x24e), ('\u{250}', 0x2c6f), + ('\u{251}', 0x2c6d), ('\u{252}', 0x2c70), ('\u{253}', 0x181), ('\u{254}', 0x186), + ('\u{256}', 0x189), ('\u{257}', 0x18a), ('\u{259}', 0x18f), ('\u{25b}', 0x190), + ('\u{25c}', 0xa7ab), ('\u{260}', 0x193), ('\u{261}', 0xa7ac), ('\u{263}', 0x194), + ('\u{264}', 0xa7cb), ('\u{265}', 0xa78d), ('\u{266}', 0xa7aa), ('\u{268}', 0x197), + ('\u{269}', 0x196), ('\u{26a}', 0xa7ae), ('\u{26b}', 0x2c62), ('\u{26c}', 0xa7ad), + ('\u{26f}', 0x19c), ('\u{271}', 0x2c6e), ('\u{272}', 0x19d), ('\u{275}', 0x19f), + ('\u{27d}', 0x2c64), ('\u{280}', 0x1a6), ('\u{282}', 0xa7c5), ('\u{283}', 0x1a9), + ('\u{287}', 0xa7b1), ('\u{288}', 0x1ae), ('\u{289}', 0x244), ('\u{28a}', 0x1b1), + ('\u{28b}', 0x1b2), ('\u{28c}', 0x245), ('\u{292}', 0x1b7), ('\u{29d}', 0xa7b2), + ('\u{29e}', 0xa7b0), ('\u{345}', 0x399), ('\u{371}', 0x370), ('\u{373}', 0x372), + ('\u{377}', 0x376), ('\u{37b}', 0x3fd), ('\u{37c}', 0x3fe), ('\u{37d}', 0x3ff), + ('\u{390}', 0x400003), ('\u{3ac}', 0x386), ('\u{3ad}', 0x388), ('\u{3ae}', 0x389), + ('\u{3af}', 0x38a), ('\u{3b0}', 0x400004), ('\u{3b1}', 0x391), ('\u{3b2}', 0x392), + ('\u{3b3}', 0x393), ('\u{3b4}', 0x394), ('\u{3b5}', 0x395), ('\u{3b6}', 0x396), + ('\u{3b7}', 0x397), ('\u{3b8}', 0x398), ('\u{3b9}', 0x399), ('\u{3ba}', 0x39a), + ('\u{3bb}', 0x39b), ('\u{3bc}', 0x39c), ('\u{3bd}', 0x39d), ('\u{3be}', 0x39e), + ('\u{3bf}', 0x39f), ('\u{3c0}', 0x3a0), ('\u{3c1}', 0x3a1), ('\u{3c2}', 0x3a3), + ('\u{3c3}', 0x3a3), ('\u{3c4}', 0x3a4), ('\u{3c5}', 0x3a5), ('\u{3c6}', 0x3a6), + ('\u{3c7}', 0x3a7), ('\u{3c8}', 0x3a8), ('\u{3c9}', 0x3a9), ('\u{3ca}', 0x3aa), + ('\u{3cb}', 0x3ab), ('\u{3cc}', 0x38c), ('\u{3cd}', 0x38e), ('\u{3ce}', 0x38f), + ('\u{3d0}', 0x392), ('\u{3d1}', 0x398), ('\u{3d5}', 0x3a6), ('\u{3d6}', 0x3a0), + ('\u{3d7}', 0x3cf), ('\u{3d9}', 0x3d8), ('\u{3db}', 0x3da), ('\u{3dd}', 0x3dc), + ('\u{3df}', 0x3de), ('\u{3e1}', 0x3e0), ('\u{3e3}', 0x3e2), ('\u{3e5}', 0x3e4), + ('\u{3e7}', 0x3e6), ('\u{3e9}', 0x3e8), ('\u{3eb}', 0x3ea), ('\u{3ed}', 0x3ec), + ('\u{3ef}', 0x3ee), ('\u{3f0}', 0x39a), ('\u{3f1}', 0x3a1), ('\u{3f2}', 0x3f9), + ('\u{3f3}', 0x37f), ('\u{3f5}', 0x395), ('\u{3f8}', 0x3f7), ('\u{3fb}', 0x3fa), + ('\u{430}', 0x410), ('\u{431}', 0x411), ('\u{432}', 0x412), ('\u{433}', 0x413), + ('\u{434}', 0x414), ('\u{435}', 0x415), ('\u{436}', 0x416), ('\u{437}', 0x417), + ('\u{438}', 0x418), ('\u{439}', 0x419), ('\u{43a}', 0x41a), ('\u{43b}', 0x41b), + ('\u{43c}', 0x41c), ('\u{43d}', 0x41d), ('\u{43e}', 0x41e), ('\u{43f}', 0x41f), + ('\u{440}', 0x420), ('\u{441}', 0x421), ('\u{442}', 0x422), ('\u{443}', 0x423), + ('\u{444}', 0x424), ('\u{445}', 0x425), ('\u{446}', 0x426), ('\u{447}', 0x427), + ('\u{448}', 0x428), ('\u{449}', 0x429), ('\u{44a}', 0x42a), ('\u{44b}', 0x42b), + ('\u{44c}', 0x42c), ('\u{44d}', 0x42d), ('\u{44e}', 0x42e), ('\u{44f}', 0x42f), + ('\u{450}', 0x400), ('\u{451}', 0x401), ('\u{452}', 0x402), ('\u{453}', 0x403), + ('\u{454}', 0x404), ('\u{455}', 0x405), ('\u{456}', 0x406), ('\u{457}', 0x407), + ('\u{458}', 0x408), ('\u{459}', 0x409), ('\u{45a}', 0x40a), ('\u{45b}', 0x40b), + ('\u{45c}', 0x40c), ('\u{45d}', 0x40d), ('\u{45e}', 0x40e), ('\u{45f}', 0x40f), + ('\u{461}', 0x460), ('\u{463}', 0x462), ('\u{465}', 0x464), ('\u{467}', 0x466), + ('\u{469}', 0x468), ('\u{46b}', 0x46a), ('\u{46d}', 0x46c), ('\u{46f}', 0x46e), + ('\u{471}', 0x470), ('\u{473}', 0x472), ('\u{475}', 0x474), ('\u{477}', 0x476), + ('\u{479}', 0x478), ('\u{47b}', 0x47a), ('\u{47d}', 0x47c), ('\u{47f}', 0x47e), + ('\u{481}', 0x480), ('\u{48b}', 0x48a), ('\u{48d}', 0x48c), ('\u{48f}', 0x48e), + ('\u{491}', 0x490), ('\u{493}', 0x492), ('\u{495}', 0x494), ('\u{497}', 0x496), + ('\u{499}', 0x498), ('\u{49b}', 0x49a), ('\u{49d}', 0x49c), ('\u{49f}', 0x49e), + ('\u{4a1}', 0x4a0), ('\u{4a3}', 0x4a2), ('\u{4a5}', 0x4a4), ('\u{4a7}', 0x4a6), + ('\u{4a9}', 0x4a8), ('\u{4ab}', 0x4aa), ('\u{4ad}', 0x4ac), ('\u{4af}', 0x4ae), + ('\u{4b1}', 0x4b0), ('\u{4b3}', 0x4b2), ('\u{4b5}', 0x4b4), ('\u{4b7}', 0x4b6), + ('\u{4b9}', 0x4b8), ('\u{4bb}', 0x4ba), ('\u{4bd}', 0x4bc), ('\u{4bf}', 0x4be), + ('\u{4c2}', 0x4c1), ('\u{4c4}', 0x4c3), ('\u{4c6}', 0x4c5), ('\u{4c8}', 0x4c7), + ('\u{4ca}', 0x4c9), ('\u{4cc}', 0x4cb), ('\u{4ce}', 0x4cd), ('\u{4cf}', 0x4c0), + ('\u{4d1}', 0x4d0), ('\u{4d3}', 0x4d2), ('\u{4d5}', 0x4d4), ('\u{4d7}', 0x4d6), + ('\u{4d9}', 0x4d8), ('\u{4db}', 0x4da), ('\u{4dd}', 0x4dc), ('\u{4df}', 0x4de), + ('\u{4e1}', 0x4e0), ('\u{4e3}', 0x4e2), ('\u{4e5}', 0x4e4), ('\u{4e7}', 0x4e6), + ('\u{4e9}', 0x4e8), ('\u{4eb}', 0x4ea), ('\u{4ed}', 0x4ec), ('\u{4ef}', 0x4ee), + ('\u{4f1}', 0x4f0), ('\u{4f3}', 0x4f2), ('\u{4f5}', 0x4f4), ('\u{4f7}', 0x4f6), + ('\u{4f9}', 0x4f8), ('\u{4fb}', 0x4fa), ('\u{4fd}', 0x4fc), ('\u{4ff}', 0x4fe), + ('\u{501}', 0x500), ('\u{503}', 0x502), ('\u{505}', 0x504), ('\u{507}', 0x506), + ('\u{509}', 0x508), ('\u{50b}', 0x50a), ('\u{50d}', 0x50c), ('\u{50f}', 0x50e), + ('\u{511}', 0x510), ('\u{513}', 0x512), ('\u{515}', 0x514), ('\u{517}', 0x516), + ('\u{519}', 0x518), ('\u{51b}', 0x51a), ('\u{51d}', 0x51c), ('\u{51f}', 0x51e), + ('\u{521}', 0x520), ('\u{523}', 0x522), ('\u{525}', 0x524), ('\u{527}', 0x526), + ('\u{529}', 0x528), ('\u{52b}', 0x52a), ('\u{52d}', 0x52c), ('\u{52f}', 0x52e), + ('\u{561}', 0x531), ('\u{562}', 0x532), ('\u{563}', 0x533), ('\u{564}', 0x534), + ('\u{565}', 0x535), ('\u{566}', 0x536), ('\u{567}', 0x537), ('\u{568}', 0x538), + ('\u{569}', 0x539), ('\u{56a}', 0x53a), ('\u{56b}', 0x53b), ('\u{56c}', 0x53c), + ('\u{56d}', 0x53d), ('\u{56e}', 0x53e), ('\u{56f}', 0x53f), ('\u{570}', 0x540), + ('\u{571}', 0x541), ('\u{572}', 0x542), ('\u{573}', 0x543), ('\u{574}', 0x544), + ('\u{575}', 0x545), ('\u{576}', 0x546), ('\u{577}', 0x547), ('\u{578}', 0x548), + ('\u{579}', 0x549), ('\u{57a}', 0x54a), ('\u{57b}', 0x54b), ('\u{57c}', 0x54c), + ('\u{57d}', 0x54d), ('\u{57e}', 0x54e), ('\u{57f}', 0x54f), ('\u{580}', 0x550), + ('\u{581}', 0x551), ('\u{582}', 0x552), ('\u{583}', 0x553), ('\u{584}', 0x554), + ('\u{585}', 0x555), ('\u{586}', 0x556), ('\u{587}', 0x400005), ('\u{10d0}', 0x1c90), + ('\u{10d1}', 0x1c91), ('\u{10d2}', 0x1c92), ('\u{10d3}', 0x1c93), ('\u{10d4}', 0x1c94), + ('\u{10d5}', 0x1c95), ('\u{10d6}', 0x1c96), ('\u{10d7}', 0x1c97), ('\u{10d8}', 0x1c98), + ('\u{10d9}', 0x1c99), ('\u{10da}', 0x1c9a), ('\u{10db}', 0x1c9b), ('\u{10dc}', 0x1c9c), + ('\u{10dd}', 0x1c9d), ('\u{10de}', 0x1c9e), ('\u{10df}', 0x1c9f), ('\u{10e0}', 0x1ca0), + ('\u{10e1}', 0x1ca1), ('\u{10e2}', 0x1ca2), ('\u{10e3}', 0x1ca3), ('\u{10e4}', 0x1ca4), + ('\u{10e5}', 0x1ca5), ('\u{10e6}', 0x1ca6), ('\u{10e7}', 0x1ca7), ('\u{10e8}', 0x1ca8), + ('\u{10e9}', 0x1ca9), ('\u{10ea}', 0x1caa), ('\u{10eb}', 0x1cab), ('\u{10ec}', 0x1cac), + ('\u{10ed}', 0x1cad), ('\u{10ee}', 0x1cae), ('\u{10ef}', 0x1caf), ('\u{10f0}', 0x1cb0), + ('\u{10f1}', 0x1cb1), ('\u{10f2}', 0x1cb2), ('\u{10f3}', 0x1cb3), ('\u{10f4}', 0x1cb4), + ('\u{10f5}', 0x1cb5), ('\u{10f6}', 0x1cb6), ('\u{10f7}', 0x1cb7), ('\u{10f8}', 0x1cb8), + ('\u{10f9}', 0x1cb9), ('\u{10fa}', 0x1cba), ('\u{10fd}', 0x1cbd), ('\u{10fe}', 0x1cbe), + ('\u{10ff}', 0x1cbf), ('\u{13f8}', 0x13f0), ('\u{13f9}', 0x13f1), ('\u{13fa}', 0x13f2), + ('\u{13fb}', 0x13f3), ('\u{13fc}', 0x13f4), ('\u{13fd}', 0x13f5), ('\u{1c80}', 0x412), + ('\u{1c81}', 0x414), ('\u{1c82}', 0x41e), ('\u{1c83}', 0x421), ('\u{1c84}', 0x422), + ('\u{1c85}', 0x422), ('\u{1c86}', 0x42a), ('\u{1c87}', 0x462), ('\u{1c88}', 0xa64a), + ('\u{1c8a}', 0x1c89), ('\u{1d79}', 0xa77d), ('\u{1d7d}', 0x2c63), ('\u{1d8e}', 0xa7c6), + ('\u{1e01}', 0x1e00), ('\u{1e03}', 0x1e02), ('\u{1e05}', 0x1e04), ('\u{1e07}', 0x1e06), + ('\u{1e09}', 0x1e08), ('\u{1e0b}', 0x1e0a), ('\u{1e0d}', 0x1e0c), ('\u{1e0f}', 0x1e0e), + ('\u{1e11}', 0x1e10), ('\u{1e13}', 0x1e12), ('\u{1e15}', 0x1e14), ('\u{1e17}', 0x1e16), + ('\u{1e19}', 0x1e18), ('\u{1e1b}', 0x1e1a), ('\u{1e1d}', 0x1e1c), ('\u{1e1f}', 0x1e1e), + ('\u{1e21}', 0x1e20), ('\u{1e23}', 0x1e22), ('\u{1e25}', 0x1e24), ('\u{1e27}', 0x1e26), + ('\u{1e29}', 0x1e28), ('\u{1e2b}', 0x1e2a), ('\u{1e2d}', 0x1e2c), ('\u{1e2f}', 0x1e2e), + ('\u{1e31}', 0x1e30), ('\u{1e33}', 0x1e32), ('\u{1e35}', 0x1e34), ('\u{1e37}', 0x1e36), + ('\u{1e39}', 0x1e38), ('\u{1e3b}', 0x1e3a), ('\u{1e3d}', 0x1e3c), ('\u{1e3f}', 0x1e3e), + ('\u{1e41}', 0x1e40), ('\u{1e43}', 0x1e42), ('\u{1e45}', 0x1e44), ('\u{1e47}', 0x1e46), + ('\u{1e49}', 0x1e48), ('\u{1e4b}', 0x1e4a), ('\u{1e4d}', 0x1e4c), ('\u{1e4f}', 0x1e4e), + ('\u{1e51}', 0x1e50), ('\u{1e53}', 0x1e52), ('\u{1e55}', 0x1e54), ('\u{1e57}', 0x1e56), + ('\u{1e59}', 0x1e58), ('\u{1e5b}', 0x1e5a), ('\u{1e5d}', 0x1e5c), ('\u{1e5f}', 0x1e5e), + ('\u{1e61}', 0x1e60), ('\u{1e63}', 0x1e62), ('\u{1e65}', 0x1e64), ('\u{1e67}', 0x1e66), + ('\u{1e69}', 0x1e68), ('\u{1e6b}', 0x1e6a), ('\u{1e6d}', 0x1e6c), ('\u{1e6f}', 0x1e6e), + ('\u{1e71}', 0x1e70), ('\u{1e73}', 0x1e72), ('\u{1e75}', 0x1e74), ('\u{1e77}', 0x1e76), + ('\u{1e79}', 0x1e78), ('\u{1e7b}', 0x1e7a), ('\u{1e7d}', 0x1e7c), ('\u{1e7f}', 0x1e7e), + ('\u{1e81}', 0x1e80), ('\u{1e83}', 0x1e82), ('\u{1e85}', 0x1e84), ('\u{1e87}', 0x1e86), + ('\u{1e89}', 0x1e88), ('\u{1e8b}', 0x1e8a), ('\u{1e8d}', 0x1e8c), ('\u{1e8f}', 0x1e8e), + ('\u{1e91}', 0x1e90), ('\u{1e93}', 0x1e92), ('\u{1e95}', 0x1e94), ('\u{1e96}', 0x400006), + ('\u{1e97}', 0x400007), ('\u{1e98}', 0x400008), ('\u{1e99}', 0x400009), + ('\u{1e9a}', 0x40000a), ('\u{1e9b}', 0x1e60), ('\u{1ea1}', 0x1ea0), ('\u{1ea3}', 0x1ea2), + ('\u{1ea5}', 0x1ea4), ('\u{1ea7}', 0x1ea6), ('\u{1ea9}', 0x1ea8), ('\u{1eab}', 0x1eaa), + ('\u{1ead}', 0x1eac), ('\u{1eaf}', 0x1eae), ('\u{1eb1}', 0x1eb0), ('\u{1eb3}', 0x1eb2), + ('\u{1eb5}', 0x1eb4), ('\u{1eb7}', 0x1eb6), ('\u{1eb9}', 0x1eb8), ('\u{1ebb}', 0x1eba), + ('\u{1ebd}', 0x1ebc), ('\u{1ebf}', 0x1ebe), ('\u{1ec1}', 0x1ec0), ('\u{1ec3}', 0x1ec2), + ('\u{1ec5}', 0x1ec4), ('\u{1ec7}', 0x1ec6), ('\u{1ec9}', 0x1ec8), ('\u{1ecb}', 0x1eca), + ('\u{1ecd}', 0x1ecc), ('\u{1ecf}', 0x1ece), ('\u{1ed1}', 0x1ed0), ('\u{1ed3}', 0x1ed2), + ('\u{1ed5}', 0x1ed4), ('\u{1ed7}', 0x1ed6), ('\u{1ed9}', 0x1ed8), ('\u{1edb}', 0x1eda), + ('\u{1edd}', 0x1edc), ('\u{1edf}', 0x1ede), ('\u{1ee1}', 0x1ee0), ('\u{1ee3}', 0x1ee2), + ('\u{1ee5}', 0x1ee4), ('\u{1ee7}', 0x1ee6), ('\u{1ee9}', 0x1ee8), ('\u{1eeb}', 0x1eea), + ('\u{1eed}', 0x1eec), ('\u{1eef}', 0x1eee), ('\u{1ef1}', 0x1ef0), ('\u{1ef3}', 0x1ef2), + ('\u{1ef5}', 0x1ef4), ('\u{1ef7}', 0x1ef6), ('\u{1ef9}', 0x1ef8), ('\u{1efb}', 0x1efa), + ('\u{1efd}', 0x1efc), ('\u{1eff}', 0x1efe), ('\u{1f00}', 0x1f08), ('\u{1f01}', 0x1f09), + ('\u{1f02}', 0x1f0a), ('\u{1f03}', 0x1f0b), ('\u{1f04}', 0x1f0c), ('\u{1f05}', 0x1f0d), + ('\u{1f06}', 0x1f0e), ('\u{1f07}', 0x1f0f), ('\u{1f10}', 0x1f18), ('\u{1f11}', 0x1f19), + ('\u{1f12}', 0x1f1a), ('\u{1f13}', 0x1f1b), ('\u{1f14}', 0x1f1c), ('\u{1f15}', 0x1f1d), + ('\u{1f20}', 0x1f28), ('\u{1f21}', 0x1f29), ('\u{1f22}', 0x1f2a), ('\u{1f23}', 0x1f2b), + ('\u{1f24}', 0x1f2c), ('\u{1f25}', 0x1f2d), ('\u{1f26}', 0x1f2e), ('\u{1f27}', 0x1f2f), + ('\u{1f30}', 0x1f38), ('\u{1f31}', 0x1f39), ('\u{1f32}', 0x1f3a), ('\u{1f33}', 0x1f3b), + ('\u{1f34}', 0x1f3c), ('\u{1f35}', 0x1f3d), ('\u{1f36}', 0x1f3e), ('\u{1f37}', 0x1f3f), + ('\u{1f40}', 0x1f48), ('\u{1f41}', 0x1f49), ('\u{1f42}', 0x1f4a), ('\u{1f43}', 0x1f4b), + ('\u{1f44}', 0x1f4c), ('\u{1f45}', 0x1f4d), ('\u{1f50}', 0x40000b), ('\u{1f51}', 0x1f59), + ('\u{1f52}', 0x40000c), ('\u{1f53}', 0x1f5b), ('\u{1f54}', 0x40000d), ('\u{1f55}', 0x1f5d), + ('\u{1f56}', 0x40000e), ('\u{1f57}', 0x1f5f), ('\u{1f60}', 0x1f68), ('\u{1f61}', 0x1f69), + ('\u{1f62}', 0x1f6a), ('\u{1f63}', 0x1f6b), ('\u{1f64}', 0x1f6c), ('\u{1f65}', 0x1f6d), + ('\u{1f66}', 0x1f6e), ('\u{1f67}', 0x1f6f), ('\u{1f70}', 0x1fba), ('\u{1f71}', 0x1fbb), + ('\u{1f72}', 0x1fc8), ('\u{1f73}', 0x1fc9), ('\u{1f74}', 0x1fca), ('\u{1f75}', 0x1fcb), + ('\u{1f76}', 0x1fda), ('\u{1f77}', 0x1fdb), ('\u{1f78}', 0x1ff8), ('\u{1f79}', 0x1ff9), + ('\u{1f7a}', 0x1fea), ('\u{1f7b}', 0x1feb), ('\u{1f7c}', 0x1ffa), ('\u{1f7d}', 0x1ffb), + ('\u{1f80}', 0x40000f), ('\u{1f81}', 0x400010), ('\u{1f82}', 0x400011), + ('\u{1f83}', 0x400012), ('\u{1f84}', 0x400013), ('\u{1f85}', 0x400014), + ('\u{1f86}', 0x400015), ('\u{1f87}', 0x400016), ('\u{1f88}', 0x400017), + ('\u{1f89}', 0x400018), ('\u{1f8a}', 0x400019), ('\u{1f8b}', 0x40001a), + ('\u{1f8c}', 0x40001b), ('\u{1f8d}', 0x40001c), ('\u{1f8e}', 0x40001d), + ('\u{1f8f}', 0x40001e), ('\u{1f90}', 0x40001f), ('\u{1f91}', 0x400020), + ('\u{1f92}', 0x400021), ('\u{1f93}', 0x400022), ('\u{1f94}', 0x400023), + ('\u{1f95}', 0x400024), ('\u{1f96}', 0x400025), ('\u{1f97}', 0x400026), + ('\u{1f98}', 0x400027), ('\u{1f99}', 0x400028), ('\u{1f9a}', 0x400029), + ('\u{1f9b}', 0x40002a), ('\u{1f9c}', 0x40002b), ('\u{1f9d}', 0x40002c), + ('\u{1f9e}', 0x40002d), ('\u{1f9f}', 0x40002e), ('\u{1fa0}', 0x40002f), + ('\u{1fa1}', 0x400030), ('\u{1fa2}', 0x400031), ('\u{1fa3}', 0x400032), + ('\u{1fa4}', 0x400033), ('\u{1fa5}', 0x400034), ('\u{1fa6}', 0x400035), + ('\u{1fa7}', 0x400036), ('\u{1fa8}', 0x400037), ('\u{1fa9}', 0x400038), + ('\u{1faa}', 0x400039), ('\u{1fab}', 0x40003a), ('\u{1fac}', 0x40003b), + ('\u{1fad}', 0x40003c), ('\u{1fae}', 0x40003d), ('\u{1faf}', 0x40003e), + ('\u{1fb0}', 0x1fb8), ('\u{1fb1}', 0x1fb9), ('\u{1fb2}', 0x40003f), ('\u{1fb3}', 0x400040), + ('\u{1fb4}', 0x400041), ('\u{1fb6}', 0x400042), ('\u{1fb7}', 0x400043), + ('\u{1fbc}', 0x400044), ('\u{1fbe}', 0x399), ('\u{1fc2}', 0x400045), ('\u{1fc3}', 0x400046), + ('\u{1fc4}', 0x400047), ('\u{1fc6}', 0x400048), ('\u{1fc7}', 0x400049), + ('\u{1fcc}', 0x40004a), ('\u{1fd0}', 0x1fd8), ('\u{1fd1}', 0x1fd9), ('\u{1fd2}', 0x40004b), + ('\u{1fd3}', 0x40004c), ('\u{1fd6}', 0x40004d), ('\u{1fd7}', 0x40004e), + ('\u{1fe0}', 0x1fe8), ('\u{1fe1}', 0x1fe9), ('\u{1fe2}', 0x40004f), ('\u{1fe3}', 0x400050), + ('\u{1fe4}', 0x400051), ('\u{1fe5}', 0x1fec), ('\u{1fe6}', 0x400052), + ('\u{1fe7}', 0x400053), ('\u{1ff2}', 0x400054), ('\u{1ff3}', 0x400055), + ('\u{1ff4}', 0x400056), ('\u{1ff6}', 0x400057), ('\u{1ff7}', 0x400058), + ('\u{1ffc}', 0x400059), ('\u{214e}', 0x2132), ('\u{2170}', 0x2160), ('\u{2171}', 0x2161), + ('\u{2172}', 0x2162), ('\u{2173}', 0x2163), ('\u{2174}', 0x2164), ('\u{2175}', 0x2165), + ('\u{2176}', 0x2166), ('\u{2177}', 0x2167), ('\u{2178}', 0x2168), ('\u{2179}', 0x2169), + ('\u{217a}', 0x216a), ('\u{217b}', 0x216b), ('\u{217c}', 0x216c), ('\u{217d}', 0x216d), + ('\u{217e}', 0x216e), ('\u{217f}', 0x216f), ('\u{2184}', 0x2183), ('\u{24d0}', 0x24b6), + ('\u{24d1}', 0x24b7), ('\u{24d2}', 0x24b8), ('\u{24d3}', 0x24b9), ('\u{24d4}', 0x24ba), + ('\u{24d5}', 0x24bb), ('\u{24d6}', 0x24bc), ('\u{24d7}', 0x24bd), ('\u{24d8}', 0x24be), + ('\u{24d9}', 0x24bf), ('\u{24da}', 0x24c0), ('\u{24db}', 0x24c1), ('\u{24dc}', 0x24c2), + ('\u{24dd}', 0x24c3), ('\u{24de}', 0x24c4), ('\u{24df}', 0x24c5), ('\u{24e0}', 0x24c6), + ('\u{24e1}', 0x24c7), ('\u{24e2}', 0x24c8), ('\u{24e3}', 0x24c9), ('\u{24e4}', 0x24ca), + ('\u{24e5}', 0x24cb), ('\u{24e6}', 0x24cc), ('\u{24e7}', 0x24cd), ('\u{24e8}', 0x24ce), + ('\u{24e9}', 0x24cf), ('\u{2c30}', 0x2c00), ('\u{2c31}', 0x2c01), ('\u{2c32}', 0x2c02), + ('\u{2c33}', 0x2c03), ('\u{2c34}', 0x2c04), ('\u{2c35}', 0x2c05), ('\u{2c36}', 0x2c06), + ('\u{2c37}', 0x2c07), ('\u{2c38}', 0x2c08), ('\u{2c39}', 0x2c09), ('\u{2c3a}', 0x2c0a), + ('\u{2c3b}', 0x2c0b), ('\u{2c3c}', 0x2c0c), ('\u{2c3d}', 0x2c0d), ('\u{2c3e}', 0x2c0e), + ('\u{2c3f}', 0x2c0f), ('\u{2c40}', 0x2c10), ('\u{2c41}', 0x2c11), ('\u{2c42}', 0x2c12), + ('\u{2c43}', 0x2c13), ('\u{2c44}', 0x2c14), ('\u{2c45}', 0x2c15), ('\u{2c46}', 0x2c16), + ('\u{2c47}', 0x2c17), ('\u{2c48}', 0x2c18), ('\u{2c49}', 0x2c19), ('\u{2c4a}', 0x2c1a), + ('\u{2c4b}', 0x2c1b), ('\u{2c4c}', 0x2c1c), ('\u{2c4d}', 0x2c1d), ('\u{2c4e}', 0x2c1e), + ('\u{2c4f}', 0x2c1f), ('\u{2c50}', 0x2c20), ('\u{2c51}', 0x2c21), ('\u{2c52}', 0x2c22), + ('\u{2c53}', 0x2c23), ('\u{2c54}', 0x2c24), ('\u{2c55}', 0x2c25), ('\u{2c56}', 0x2c26), + ('\u{2c57}', 0x2c27), ('\u{2c58}', 0x2c28), ('\u{2c59}', 0x2c29), ('\u{2c5a}', 0x2c2a), + ('\u{2c5b}', 0x2c2b), ('\u{2c5c}', 0x2c2c), ('\u{2c5d}', 0x2c2d), ('\u{2c5e}', 0x2c2e), + ('\u{2c5f}', 0x2c2f), ('\u{2c61}', 0x2c60), ('\u{2c65}', 0x23a), ('\u{2c66}', 0x23e), + ('\u{2c68}', 0x2c67), ('\u{2c6a}', 0x2c69), ('\u{2c6c}', 0x2c6b), ('\u{2c73}', 0x2c72), + ('\u{2c76}', 0x2c75), ('\u{2c81}', 0x2c80), ('\u{2c83}', 0x2c82), ('\u{2c85}', 0x2c84), + ('\u{2c87}', 0x2c86), ('\u{2c89}', 0x2c88), ('\u{2c8b}', 0x2c8a), ('\u{2c8d}', 0x2c8c), + ('\u{2c8f}', 0x2c8e), ('\u{2c91}', 0x2c90), ('\u{2c93}', 0x2c92), ('\u{2c95}', 0x2c94), + ('\u{2c97}', 0x2c96), ('\u{2c99}', 0x2c98), ('\u{2c9b}', 0x2c9a), ('\u{2c9d}', 0x2c9c), + ('\u{2c9f}', 0x2c9e), ('\u{2ca1}', 0x2ca0), ('\u{2ca3}', 0x2ca2), ('\u{2ca5}', 0x2ca4), + ('\u{2ca7}', 0x2ca6), ('\u{2ca9}', 0x2ca8), ('\u{2cab}', 0x2caa), ('\u{2cad}', 0x2cac), + ('\u{2caf}', 0x2cae), ('\u{2cb1}', 0x2cb0), ('\u{2cb3}', 0x2cb2), ('\u{2cb5}', 0x2cb4), + ('\u{2cb7}', 0x2cb6), ('\u{2cb9}', 0x2cb8), ('\u{2cbb}', 0x2cba), ('\u{2cbd}', 0x2cbc), + ('\u{2cbf}', 0x2cbe), ('\u{2cc1}', 0x2cc0), ('\u{2cc3}', 0x2cc2), ('\u{2cc5}', 0x2cc4), + ('\u{2cc7}', 0x2cc6), ('\u{2cc9}', 0x2cc8), ('\u{2ccb}', 0x2cca), ('\u{2ccd}', 0x2ccc), + ('\u{2ccf}', 0x2cce), ('\u{2cd1}', 0x2cd0), ('\u{2cd3}', 0x2cd2), ('\u{2cd5}', 0x2cd4), + ('\u{2cd7}', 0x2cd6), ('\u{2cd9}', 0x2cd8), ('\u{2cdb}', 0x2cda), ('\u{2cdd}', 0x2cdc), + ('\u{2cdf}', 0x2cde), ('\u{2ce1}', 0x2ce0), ('\u{2ce3}', 0x2ce2), ('\u{2cec}', 0x2ceb), + ('\u{2cee}', 0x2ced), ('\u{2cf3}', 0x2cf2), ('\u{2d00}', 0x10a0), ('\u{2d01}', 0x10a1), + ('\u{2d02}', 0x10a2), ('\u{2d03}', 0x10a3), ('\u{2d04}', 0x10a4), ('\u{2d05}', 0x10a5), + ('\u{2d06}', 0x10a6), ('\u{2d07}', 0x10a7), ('\u{2d08}', 0x10a8), ('\u{2d09}', 0x10a9), + ('\u{2d0a}', 0x10aa), ('\u{2d0b}', 0x10ab), ('\u{2d0c}', 0x10ac), ('\u{2d0d}', 0x10ad), + ('\u{2d0e}', 0x10ae), ('\u{2d0f}', 0x10af), ('\u{2d10}', 0x10b0), ('\u{2d11}', 0x10b1), + ('\u{2d12}', 0x10b2), ('\u{2d13}', 0x10b3), ('\u{2d14}', 0x10b4), ('\u{2d15}', 0x10b5), + ('\u{2d16}', 0x10b6), ('\u{2d17}', 0x10b7), ('\u{2d18}', 0x10b8), ('\u{2d19}', 0x10b9), + ('\u{2d1a}', 0x10ba), ('\u{2d1b}', 0x10bb), ('\u{2d1c}', 0x10bc), ('\u{2d1d}', 0x10bd), + ('\u{2d1e}', 0x10be), ('\u{2d1f}', 0x10bf), ('\u{2d20}', 0x10c0), ('\u{2d21}', 0x10c1), + ('\u{2d22}', 0x10c2), ('\u{2d23}', 0x10c3), ('\u{2d24}', 0x10c4), ('\u{2d25}', 0x10c5), + ('\u{2d27}', 0x10c7), ('\u{2d2d}', 0x10cd), ('\u{a641}', 0xa640), ('\u{a643}', 0xa642), + ('\u{a645}', 0xa644), ('\u{a647}', 0xa646), ('\u{a649}', 0xa648), ('\u{a64b}', 0xa64a), + ('\u{a64d}', 0xa64c), ('\u{a64f}', 0xa64e), ('\u{a651}', 0xa650), ('\u{a653}', 0xa652), + ('\u{a655}', 0xa654), ('\u{a657}', 0xa656), ('\u{a659}', 0xa658), ('\u{a65b}', 0xa65a), + ('\u{a65d}', 0xa65c), ('\u{a65f}', 0xa65e), ('\u{a661}', 0xa660), ('\u{a663}', 0xa662), + ('\u{a665}', 0xa664), ('\u{a667}', 0xa666), ('\u{a669}', 0xa668), ('\u{a66b}', 0xa66a), + ('\u{a66d}', 0xa66c), ('\u{a681}', 0xa680), ('\u{a683}', 0xa682), ('\u{a685}', 0xa684), + ('\u{a687}', 0xa686), ('\u{a689}', 0xa688), ('\u{a68b}', 0xa68a), ('\u{a68d}', 0xa68c), + ('\u{a68f}', 0xa68e), ('\u{a691}', 0xa690), ('\u{a693}', 0xa692), ('\u{a695}', 0xa694), + ('\u{a697}', 0xa696), ('\u{a699}', 0xa698), ('\u{a69b}', 0xa69a), ('\u{a723}', 0xa722), + ('\u{a725}', 0xa724), ('\u{a727}', 0xa726), ('\u{a729}', 0xa728), ('\u{a72b}', 0xa72a), + ('\u{a72d}', 0xa72c), ('\u{a72f}', 0xa72e), ('\u{a733}', 0xa732), ('\u{a735}', 0xa734), + ('\u{a737}', 0xa736), ('\u{a739}', 0xa738), ('\u{a73b}', 0xa73a), ('\u{a73d}', 0xa73c), + ('\u{a73f}', 0xa73e), ('\u{a741}', 0xa740), ('\u{a743}', 0xa742), ('\u{a745}', 0xa744), + ('\u{a747}', 0xa746), ('\u{a749}', 0xa748), ('\u{a74b}', 0xa74a), ('\u{a74d}', 0xa74c), + ('\u{a74f}', 0xa74e), ('\u{a751}', 0xa750), ('\u{a753}', 0xa752), ('\u{a755}', 0xa754), + ('\u{a757}', 0xa756), ('\u{a759}', 0xa758), ('\u{a75b}', 0xa75a), ('\u{a75d}', 0xa75c), + ('\u{a75f}', 0xa75e), ('\u{a761}', 0xa760), ('\u{a763}', 0xa762), ('\u{a765}', 0xa764), + ('\u{a767}', 0xa766), ('\u{a769}', 0xa768), ('\u{a76b}', 0xa76a), ('\u{a76d}', 0xa76c), + ('\u{a76f}', 0xa76e), ('\u{a77a}', 0xa779), ('\u{a77c}', 0xa77b), ('\u{a77f}', 0xa77e), + ('\u{a781}', 0xa780), ('\u{a783}', 0xa782), ('\u{a785}', 0xa784), ('\u{a787}', 0xa786), + ('\u{a78c}', 0xa78b), ('\u{a791}', 0xa790), ('\u{a793}', 0xa792), ('\u{a794}', 0xa7c4), + ('\u{a797}', 0xa796), ('\u{a799}', 0xa798), ('\u{a79b}', 0xa79a), ('\u{a79d}', 0xa79c), + ('\u{a79f}', 0xa79e), ('\u{a7a1}', 0xa7a0), ('\u{a7a3}', 0xa7a2), ('\u{a7a5}', 0xa7a4), + ('\u{a7a7}', 0xa7a6), ('\u{a7a9}', 0xa7a8), ('\u{a7b5}', 0xa7b4), ('\u{a7b7}', 0xa7b6), + ('\u{a7b9}', 0xa7b8), ('\u{a7bb}', 0xa7ba), ('\u{a7bd}', 0xa7bc), ('\u{a7bf}', 0xa7be), + ('\u{a7c1}', 0xa7c0), ('\u{a7c3}', 0xa7c2), ('\u{a7c8}', 0xa7c7), ('\u{a7ca}', 0xa7c9), + ('\u{a7cd}', 0xa7cc), ('\u{a7cf}', 0xa7ce), ('\u{a7d1}', 0xa7d0), ('\u{a7d3}', 0xa7d2), + ('\u{a7d5}', 0xa7d4), ('\u{a7d7}', 0xa7d6), ('\u{a7d9}', 0xa7d8), ('\u{a7db}', 0xa7da), + ('\u{a7f6}', 0xa7f5), ('\u{ab53}', 0xa7b3), ('\u{ab70}', 0x13a0), ('\u{ab71}', 0x13a1), + ('\u{ab72}', 0x13a2), ('\u{ab73}', 0x13a3), ('\u{ab74}', 0x13a4), ('\u{ab75}', 0x13a5), + ('\u{ab76}', 0x13a6), ('\u{ab77}', 0x13a7), ('\u{ab78}', 0x13a8), ('\u{ab79}', 0x13a9), + ('\u{ab7a}', 0x13aa), ('\u{ab7b}', 0x13ab), ('\u{ab7c}', 0x13ac), ('\u{ab7d}', 0x13ad), + ('\u{ab7e}', 0x13ae), ('\u{ab7f}', 0x13af), ('\u{ab80}', 0x13b0), ('\u{ab81}', 0x13b1), + ('\u{ab82}', 0x13b2), ('\u{ab83}', 0x13b3), ('\u{ab84}', 0x13b4), ('\u{ab85}', 0x13b5), + ('\u{ab86}', 0x13b6), ('\u{ab87}', 0x13b7), ('\u{ab88}', 0x13b8), ('\u{ab89}', 0x13b9), + ('\u{ab8a}', 0x13ba), ('\u{ab8b}', 0x13bb), ('\u{ab8c}', 0x13bc), ('\u{ab8d}', 0x13bd), + ('\u{ab8e}', 0x13be), ('\u{ab8f}', 0x13bf), ('\u{ab90}', 0x13c0), ('\u{ab91}', 0x13c1), + ('\u{ab92}', 0x13c2), ('\u{ab93}', 0x13c3), ('\u{ab94}', 0x13c4), ('\u{ab95}', 0x13c5), + ('\u{ab96}', 0x13c6), ('\u{ab97}', 0x13c7), ('\u{ab98}', 0x13c8), ('\u{ab99}', 0x13c9), + ('\u{ab9a}', 0x13ca), ('\u{ab9b}', 0x13cb), ('\u{ab9c}', 0x13cc), ('\u{ab9d}', 0x13cd), + ('\u{ab9e}', 0x13ce), ('\u{ab9f}', 0x13cf), ('\u{aba0}', 0x13d0), ('\u{aba1}', 0x13d1), + ('\u{aba2}', 0x13d2), ('\u{aba3}', 0x13d3), ('\u{aba4}', 0x13d4), ('\u{aba5}', 0x13d5), + ('\u{aba6}', 0x13d6), ('\u{aba7}', 0x13d7), ('\u{aba8}', 0x13d8), ('\u{aba9}', 0x13d9), + ('\u{abaa}', 0x13da), ('\u{abab}', 0x13db), ('\u{abac}', 0x13dc), ('\u{abad}', 0x13dd), + ('\u{abae}', 0x13de), ('\u{abaf}', 0x13df), ('\u{abb0}', 0x13e0), ('\u{abb1}', 0x13e1), + ('\u{abb2}', 0x13e2), ('\u{abb3}', 0x13e3), ('\u{abb4}', 0x13e4), ('\u{abb5}', 0x13e5), + ('\u{abb6}', 0x13e6), ('\u{abb7}', 0x13e7), ('\u{abb8}', 0x13e8), ('\u{abb9}', 0x13e9), + ('\u{abba}', 0x13ea), ('\u{abbb}', 0x13eb), ('\u{abbc}', 0x13ec), ('\u{abbd}', 0x13ed), + ('\u{abbe}', 0x13ee), ('\u{abbf}', 0x13ef), ('\u{fb00}', 0x40005a), ('\u{fb01}', 0x40005b), + ('\u{fb02}', 0x40005c), ('\u{fb03}', 0x40005d), ('\u{fb04}', 0x40005e), + ('\u{fb05}', 0x40005f), ('\u{fb06}', 0x400060), ('\u{fb13}', 0x400061), + ('\u{fb14}', 0x400062), ('\u{fb15}', 0x400063), ('\u{fb16}', 0x400064), + ('\u{fb17}', 0x400065), ('\u{ff41}', 0xff21), ('\u{ff42}', 0xff22), ('\u{ff43}', 0xff23), + ('\u{ff44}', 0xff24), ('\u{ff45}', 0xff25), ('\u{ff46}', 0xff26), ('\u{ff47}', 0xff27), + ('\u{ff48}', 0xff28), ('\u{ff49}', 0xff29), ('\u{ff4a}', 0xff2a), ('\u{ff4b}', 0xff2b), + ('\u{ff4c}', 0xff2c), ('\u{ff4d}', 0xff2d), ('\u{ff4e}', 0xff2e), ('\u{ff4f}', 0xff2f), + ('\u{ff50}', 0xff30), ('\u{ff51}', 0xff31), ('\u{ff52}', 0xff32), ('\u{ff53}', 0xff33), + ('\u{ff54}', 0xff34), ('\u{ff55}', 0xff35), ('\u{ff56}', 0xff36), ('\u{ff57}', 0xff37), + ('\u{ff58}', 0xff38), ('\u{ff59}', 0xff39), ('\u{ff5a}', 0xff3a), ('\u{10428}', 0x10400), + ('\u{10429}', 0x10401), ('\u{1042a}', 0x10402), ('\u{1042b}', 0x10403), + ('\u{1042c}', 0x10404), ('\u{1042d}', 0x10405), ('\u{1042e}', 0x10406), + ('\u{1042f}', 0x10407), ('\u{10430}', 0x10408), ('\u{10431}', 0x10409), + ('\u{10432}', 0x1040a), ('\u{10433}', 0x1040b), ('\u{10434}', 0x1040c), + ('\u{10435}', 0x1040d), ('\u{10436}', 0x1040e), ('\u{10437}', 0x1040f), + ('\u{10438}', 0x10410), ('\u{10439}', 0x10411), ('\u{1043a}', 0x10412), + ('\u{1043b}', 0x10413), ('\u{1043c}', 0x10414), ('\u{1043d}', 0x10415), + ('\u{1043e}', 0x10416), ('\u{1043f}', 0x10417), ('\u{10440}', 0x10418), + ('\u{10441}', 0x10419), ('\u{10442}', 0x1041a), ('\u{10443}', 0x1041b), + ('\u{10444}', 0x1041c), ('\u{10445}', 0x1041d), ('\u{10446}', 0x1041e), + ('\u{10447}', 0x1041f), ('\u{10448}', 0x10420), ('\u{10449}', 0x10421), + ('\u{1044a}', 0x10422), ('\u{1044b}', 0x10423), ('\u{1044c}', 0x10424), + ('\u{1044d}', 0x10425), ('\u{1044e}', 0x10426), ('\u{1044f}', 0x10427), + ('\u{104d8}', 0x104b0), ('\u{104d9}', 0x104b1), ('\u{104da}', 0x104b2), + ('\u{104db}', 0x104b3), ('\u{104dc}', 0x104b4), ('\u{104dd}', 0x104b5), + ('\u{104de}', 0x104b6), ('\u{104df}', 0x104b7), ('\u{104e0}', 0x104b8), + ('\u{104e1}', 0x104b9), ('\u{104e2}', 0x104ba), ('\u{104e3}', 0x104bb), + ('\u{104e4}', 0x104bc), ('\u{104e5}', 0x104bd), ('\u{104e6}', 0x104be), + ('\u{104e7}', 0x104bf), ('\u{104e8}', 0x104c0), ('\u{104e9}', 0x104c1), + ('\u{104ea}', 0x104c2), ('\u{104eb}', 0x104c3), ('\u{104ec}', 0x104c4), + ('\u{104ed}', 0x104c5), ('\u{104ee}', 0x104c6), ('\u{104ef}', 0x104c7), + ('\u{104f0}', 0x104c8), ('\u{104f1}', 0x104c9), ('\u{104f2}', 0x104ca), + ('\u{104f3}', 0x104cb), ('\u{104f4}', 0x104cc), ('\u{104f5}', 0x104cd), + ('\u{104f6}', 0x104ce), ('\u{104f7}', 0x104cf), ('\u{104f8}', 0x104d0), + ('\u{104f9}', 0x104d1), ('\u{104fa}', 0x104d2), ('\u{104fb}', 0x104d3), + ('\u{10597}', 0x10570), ('\u{10598}', 0x10571), ('\u{10599}', 0x10572), + ('\u{1059a}', 0x10573), ('\u{1059b}', 0x10574), ('\u{1059c}', 0x10575), + ('\u{1059d}', 0x10576), ('\u{1059e}', 0x10577), ('\u{1059f}', 0x10578), + ('\u{105a0}', 0x10579), ('\u{105a1}', 0x1057a), ('\u{105a3}', 0x1057c), + ('\u{105a4}', 0x1057d), ('\u{105a5}', 0x1057e), ('\u{105a6}', 0x1057f), + ('\u{105a7}', 0x10580), ('\u{105a8}', 0x10581), ('\u{105a9}', 0x10582), + ('\u{105aa}', 0x10583), ('\u{105ab}', 0x10584), ('\u{105ac}', 0x10585), + ('\u{105ad}', 0x10586), ('\u{105ae}', 0x10587), ('\u{105af}', 0x10588), + ('\u{105b0}', 0x10589), ('\u{105b1}', 0x1058a), ('\u{105b3}', 0x1058c), + ('\u{105b4}', 0x1058d), ('\u{105b5}', 0x1058e), ('\u{105b6}', 0x1058f), + ('\u{105b7}', 0x10590), ('\u{105b8}', 0x10591), ('\u{105b9}', 0x10592), + ('\u{105bb}', 0x10594), ('\u{105bc}', 0x10595), ('\u{10cc0}', 0x10c80), + ('\u{10cc1}', 0x10c81), ('\u{10cc2}', 0x10c82), ('\u{10cc3}', 0x10c83), + ('\u{10cc4}', 0x10c84), ('\u{10cc5}', 0x10c85), ('\u{10cc6}', 0x10c86), + ('\u{10cc7}', 0x10c87), ('\u{10cc8}', 0x10c88), ('\u{10cc9}', 0x10c89), + ('\u{10cca}', 0x10c8a), ('\u{10ccb}', 0x10c8b), ('\u{10ccc}', 0x10c8c), + ('\u{10ccd}', 0x10c8d), ('\u{10cce}', 0x10c8e), ('\u{10ccf}', 0x10c8f), + ('\u{10cd0}', 0x10c90), ('\u{10cd1}', 0x10c91), ('\u{10cd2}', 0x10c92), + ('\u{10cd3}', 0x10c93), ('\u{10cd4}', 0x10c94), ('\u{10cd5}', 0x10c95), + ('\u{10cd6}', 0x10c96), ('\u{10cd7}', 0x10c97), ('\u{10cd8}', 0x10c98), + ('\u{10cd9}', 0x10c99), ('\u{10cda}', 0x10c9a), ('\u{10cdb}', 0x10c9b), + ('\u{10cdc}', 0x10c9c), ('\u{10cdd}', 0x10c9d), ('\u{10cde}', 0x10c9e), + ('\u{10cdf}', 0x10c9f), ('\u{10ce0}', 0x10ca0), ('\u{10ce1}', 0x10ca1), + ('\u{10ce2}', 0x10ca2), ('\u{10ce3}', 0x10ca3), ('\u{10ce4}', 0x10ca4), + ('\u{10ce5}', 0x10ca5), ('\u{10ce6}', 0x10ca6), ('\u{10ce7}', 0x10ca7), + ('\u{10ce8}', 0x10ca8), ('\u{10ce9}', 0x10ca9), ('\u{10cea}', 0x10caa), + ('\u{10ceb}', 0x10cab), ('\u{10cec}', 0x10cac), ('\u{10ced}', 0x10cad), + ('\u{10cee}', 0x10cae), ('\u{10cef}', 0x10caf), ('\u{10cf0}', 0x10cb0), + ('\u{10cf1}', 0x10cb1), ('\u{10cf2}', 0x10cb2), ('\u{10d70}', 0x10d50), + ('\u{10d71}', 0x10d51), ('\u{10d72}', 0x10d52), ('\u{10d73}', 0x10d53), + ('\u{10d74}', 0x10d54), ('\u{10d75}', 0x10d55), ('\u{10d76}', 0x10d56), + ('\u{10d77}', 0x10d57), ('\u{10d78}', 0x10d58), ('\u{10d79}', 0x10d59), + ('\u{10d7a}', 0x10d5a), ('\u{10d7b}', 0x10d5b), ('\u{10d7c}', 0x10d5c), + ('\u{10d7d}', 0x10d5d), ('\u{10d7e}', 0x10d5e), ('\u{10d7f}', 0x10d5f), + ('\u{10d80}', 0x10d60), ('\u{10d81}', 0x10d61), ('\u{10d82}', 0x10d62), + ('\u{10d83}', 0x10d63), ('\u{10d84}', 0x10d64), ('\u{10d85}', 0x10d65), + ('\u{118c0}', 0x118a0), ('\u{118c1}', 0x118a1), ('\u{118c2}', 0x118a2), + ('\u{118c3}', 0x118a3), ('\u{118c4}', 0x118a4), ('\u{118c5}', 0x118a5), + ('\u{118c6}', 0x118a6), ('\u{118c7}', 0x118a7), ('\u{118c8}', 0x118a8), + ('\u{118c9}', 0x118a9), ('\u{118ca}', 0x118aa), ('\u{118cb}', 0x118ab), + ('\u{118cc}', 0x118ac), ('\u{118cd}', 0x118ad), ('\u{118ce}', 0x118ae), + ('\u{118cf}', 0x118af), ('\u{118d0}', 0x118b0), ('\u{118d1}', 0x118b1), + ('\u{118d2}', 0x118b2), ('\u{118d3}', 0x118b3), ('\u{118d4}', 0x118b4), + ('\u{118d5}', 0x118b5), ('\u{118d6}', 0x118b6), ('\u{118d7}', 0x118b7), + ('\u{118d8}', 0x118b8), ('\u{118d9}', 0x118b9), ('\u{118da}', 0x118ba), + ('\u{118db}', 0x118bb), ('\u{118dc}', 0x118bc), ('\u{118dd}', 0x118bd), + ('\u{118de}', 0x118be), ('\u{118df}', 0x118bf), ('\u{16e60}', 0x16e40), + ('\u{16e61}', 0x16e41), ('\u{16e62}', 0x16e42), ('\u{16e63}', 0x16e43), + ('\u{16e64}', 0x16e44), ('\u{16e65}', 0x16e45), ('\u{16e66}', 0x16e46), + ('\u{16e67}', 0x16e47), ('\u{16e68}', 0x16e48), ('\u{16e69}', 0x16e49), + ('\u{16e6a}', 0x16e4a), ('\u{16e6b}', 0x16e4b), ('\u{16e6c}', 0x16e4c), + ('\u{16e6d}', 0x16e4d), ('\u{16e6e}', 0x16e4e), ('\u{16e6f}', 0x16e4f), + ('\u{16e70}', 0x16e50), ('\u{16e71}', 0x16e51), ('\u{16e72}', 0x16e52), + ('\u{16e73}', 0x16e53), ('\u{16e74}', 0x16e54), ('\u{16e75}', 0x16e55), + ('\u{16e76}', 0x16e56), ('\u{16e77}', 0x16e57), ('\u{16e78}', 0x16e58), + ('\u{16e79}', 0x16e59), ('\u{16e7a}', 0x16e5a), ('\u{16e7b}', 0x16e5b), + ('\u{16e7c}', 0x16e5c), ('\u{16e7d}', 0x16e5d), ('\u{16e7e}', 0x16e5e), + ('\u{16e7f}', 0x16e5f), ('\u{16ebb}', 0x16ea0), ('\u{16ebc}', 0x16ea1), + ('\u{16ebd}', 0x16ea2), ('\u{16ebe}', 0x16ea3), ('\u{16ebf}', 0x16ea4), + ('\u{16ec0}', 0x16ea5), ('\u{16ec1}', 0x16ea6), ('\u{16ec2}', 0x16ea7), + ('\u{16ec3}', 0x16ea8), ('\u{16ec4}', 0x16ea9), ('\u{16ec5}', 0x16eaa), + ('\u{16ec6}', 0x16eab), ('\u{16ec7}', 0x16eac), ('\u{16ec8}', 0x16ead), + ('\u{16ec9}', 0x16eae), ('\u{16eca}', 0x16eaf), ('\u{16ecb}', 0x16eb0), + ('\u{16ecc}', 0x16eb1), ('\u{16ecd}', 0x16eb2), ('\u{16ece}', 0x16eb3), + ('\u{16ecf}', 0x16eb4), ('\u{16ed0}', 0x16eb5), ('\u{16ed1}', 0x16eb6), + ('\u{16ed2}', 0x16eb7), ('\u{16ed3}', 0x16eb8), ('\u{1e922}', 0x1e900), + ('\u{1e923}', 0x1e901), ('\u{1e924}', 0x1e902), ('\u{1e925}', 0x1e903), + ('\u{1e926}', 0x1e904), ('\u{1e927}', 0x1e905), ('\u{1e928}', 0x1e906), + ('\u{1e929}', 0x1e907), ('\u{1e92a}', 0x1e908), ('\u{1e92b}', 0x1e909), + ('\u{1e92c}', 0x1e90a), ('\u{1e92d}', 0x1e90b), ('\u{1e92e}', 0x1e90c), + ('\u{1e92f}', 0x1e90d), ('\u{1e930}', 0x1e90e), ('\u{1e931}', 0x1e90f), + ('\u{1e932}', 0x1e910), ('\u{1e933}', 0x1e911), ('\u{1e934}', 0x1e912), + ('\u{1e935}', 0x1e913), ('\u{1e936}', 0x1e914), ('\u{1e937}', 0x1e915), + ('\u{1e938}', 0x1e916), ('\u{1e939}', 0x1e917), ('\u{1e93a}', 0x1e918), + ('\u{1e93b}', 0x1e919), ('\u{1e93c}', 0x1e91a), ('\u{1e93d}', 0x1e91b), + ('\u{1e93e}', 0x1e91c), ('\u{1e93f}', 0x1e91d), ('\u{1e940}', 0x1e91e), + ('\u{1e941}', 0x1e91f), ('\u{1e942}', 0x1e920), ('\u{1e943}', 0x1e921), ]; #[rustfmt::skip] - static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[ - ['i', '\u{307}', '\u{0}'], + static UPPERCASE_TABLE_MULTI: &[[char; 3]; 102] = &[ + ['\u{53}', '\u{53}', '\u{0}'], ['\u{2bc}', '\u{4e}', '\u{0}'], + ['\u{4a}', '\u{30c}', '\u{0}'], ['\u{399}', '\u{308}', '\u{301}'], + ['\u{3a5}', '\u{308}', '\u{301}'], ['\u{535}', '\u{552}', '\u{0}'], + ['\u{48}', '\u{331}', '\u{0}'], ['\u{54}', '\u{308}', '\u{0}'], + ['\u{57}', '\u{30a}', '\u{0}'], ['\u{59}', '\u{30a}', '\u{0}'], + ['\u{41}', '\u{2be}', '\u{0}'], ['\u{3a5}', '\u{313}', '\u{0}'], + ['\u{3a5}', '\u{313}', '\u{300}'], ['\u{3a5}', '\u{313}', '\u{301}'], + ['\u{3a5}', '\u{313}', '\u{342}'], ['\u{1f08}', '\u{399}', '\u{0}'], + ['\u{1f09}', '\u{399}', '\u{0}'], ['\u{1f0a}', '\u{399}', '\u{0}'], + ['\u{1f0b}', '\u{399}', '\u{0}'], ['\u{1f0c}', '\u{399}', '\u{0}'], + ['\u{1f0d}', '\u{399}', '\u{0}'], ['\u{1f0e}', '\u{399}', '\u{0}'], + ['\u{1f0f}', '\u{399}', '\u{0}'], ['\u{1f08}', '\u{399}', '\u{0}'], + ['\u{1f09}', '\u{399}', '\u{0}'], ['\u{1f0a}', '\u{399}', '\u{0}'], + ['\u{1f0b}', '\u{399}', '\u{0}'], ['\u{1f0c}', '\u{399}', '\u{0}'], + ['\u{1f0d}', '\u{399}', '\u{0}'], ['\u{1f0e}', '\u{399}', '\u{0}'], + ['\u{1f0f}', '\u{399}', '\u{0}'], ['\u{1f28}', '\u{399}', '\u{0}'], + ['\u{1f29}', '\u{399}', '\u{0}'], ['\u{1f2a}', '\u{399}', '\u{0}'], + ['\u{1f2b}', '\u{399}', '\u{0}'], ['\u{1f2c}', '\u{399}', '\u{0}'], + ['\u{1f2d}', '\u{399}', '\u{0}'], ['\u{1f2e}', '\u{399}', '\u{0}'], + ['\u{1f2f}', '\u{399}', '\u{0}'], ['\u{1f28}', '\u{399}', '\u{0}'], + ['\u{1f29}', '\u{399}', '\u{0}'], ['\u{1f2a}', '\u{399}', '\u{0}'], + ['\u{1f2b}', '\u{399}', '\u{0}'], ['\u{1f2c}', '\u{399}', '\u{0}'], + ['\u{1f2d}', '\u{399}', '\u{0}'], ['\u{1f2e}', '\u{399}', '\u{0}'], + ['\u{1f2f}', '\u{399}', '\u{0}'], ['\u{1f68}', '\u{399}', '\u{0}'], + ['\u{1f69}', '\u{399}', '\u{0}'], ['\u{1f6a}', '\u{399}', '\u{0}'], + ['\u{1f6b}', '\u{399}', '\u{0}'], ['\u{1f6c}', '\u{399}', '\u{0}'], + ['\u{1f6d}', '\u{399}', '\u{0}'], ['\u{1f6e}', '\u{399}', '\u{0}'], + ['\u{1f6f}', '\u{399}', '\u{0}'], ['\u{1f68}', '\u{399}', '\u{0}'], + ['\u{1f69}', '\u{399}', '\u{0}'], ['\u{1f6a}', '\u{399}', '\u{0}'], + ['\u{1f6b}', '\u{399}', '\u{0}'], ['\u{1f6c}', '\u{399}', '\u{0}'], + ['\u{1f6d}', '\u{399}', '\u{0}'], ['\u{1f6e}', '\u{399}', '\u{0}'], + ['\u{1f6f}', '\u{399}', '\u{0}'], ['\u{1fba}', '\u{399}', '\u{0}'], + ['\u{391}', '\u{399}', '\u{0}'], ['\u{386}', '\u{399}', '\u{0}'], + ['\u{391}', '\u{342}', '\u{0}'], ['\u{391}', '\u{342}', '\u{399}'], + ['\u{391}', '\u{399}', '\u{0}'], ['\u{1fca}', '\u{399}', '\u{0}'], + ['\u{397}', '\u{399}', '\u{0}'], ['\u{389}', '\u{399}', '\u{0}'], + ['\u{397}', '\u{342}', '\u{0}'], ['\u{397}', '\u{342}', '\u{399}'], + ['\u{397}', '\u{399}', '\u{0}'], ['\u{399}', '\u{308}', '\u{300}'], + ['\u{399}', '\u{308}', '\u{301}'], ['\u{399}', '\u{342}', '\u{0}'], + ['\u{399}', '\u{308}', '\u{342}'], ['\u{3a5}', '\u{308}', '\u{300}'], + ['\u{3a5}', '\u{308}', '\u{301}'], ['\u{3a1}', '\u{313}', '\u{0}'], + ['\u{3a5}', '\u{342}', '\u{0}'], ['\u{3a5}', '\u{308}', '\u{342}'], + ['\u{1ffa}', '\u{399}', '\u{0}'], ['\u{3a9}', '\u{399}', '\u{0}'], + ['\u{38f}', '\u{399}', '\u{0}'], ['\u{3a9}', '\u{342}', '\u{0}'], + ['\u{3a9}', '\u{342}', '\u{399}'], ['\u{3a9}', '\u{399}', '\u{0}'], + ['\u{46}', '\u{46}', '\u{0}'], ['\u{46}', '\u{49}', '\u{0}'], ['\u{46}', '\u{4c}', '\u{0}'], + ['\u{46}', '\u{46}', '\u{49}'], ['\u{46}', '\u{46}', '\u{4c}'], + ['\u{53}', '\u{54}', '\u{0}'], ['\u{53}', '\u{54}', '\u{0}'], + ['\u{544}', '\u{546}', '\u{0}'], ['\u{544}', '\u{535}', '\u{0}'], + ['\u{544}', '\u{53b}', '\u{0}'], ['\u{54e}', '\u{546}', '\u{0}'], + ['\u{544}', '\u{53d}', '\u{0}'], ]; #[inline] @@ -1170,436 +1692,4 @@ pub mod conversions { ) } } - #[rustfmt::skip] - static UPPERCASE_TABLE: &[(char, u32); 1554] = &[ - ('\u{b5}', 924), ('\u{df}', 4194304), ('\u{e0}', 192), ('\u{e1}', 193), ('\u{e2}', 194), - ('\u{e3}', 195), ('\u{e4}', 196), ('\u{e5}', 197), ('\u{e6}', 198), ('\u{e7}', 199), - ('\u{e8}', 200), ('\u{e9}', 201), ('\u{ea}', 202), ('\u{eb}', 203), ('\u{ec}', 204), - ('\u{ed}', 205), ('\u{ee}', 206), ('\u{ef}', 207), ('\u{f0}', 208), ('\u{f1}', 209), - ('\u{f2}', 210), ('\u{f3}', 211), ('\u{f4}', 212), ('\u{f5}', 213), ('\u{f6}', 214), - ('\u{f8}', 216), ('\u{f9}', 217), ('\u{fa}', 218), ('\u{fb}', 219), ('\u{fc}', 220), - ('\u{fd}', 221), ('\u{fe}', 222), ('\u{ff}', 376), ('\u{101}', 256), ('\u{103}', 258), - ('\u{105}', 260), ('\u{107}', 262), ('\u{109}', 264), ('\u{10b}', 266), ('\u{10d}', 268), - ('\u{10f}', 270), ('\u{111}', 272), ('\u{113}', 274), ('\u{115}', 276), ('\u{117}', 278), - ('\u{119}', 280), ('\u{11b}', 282), ('\u{11d}', 284), ('\u{11f}', 286), ('\u{121}', 288), - ('\u{123}', 290), ('\u{125}', 292), ('\u{127}', 294), ('\u{129}', 296), ('\u{12b}', 298), - ('\u{12d}', 300), ('\u{12f}', 302), ('\u{131}', 73), ('\u{133}', 306), ('\u{135}', 308), - ('\u{137}', 310), ('\u{13a}', 313), ('\u{13c}', 315), ('\u{13e}', 317), ('\u{140}', 319), - ('\u{142}', 321), ('\u{144}', 323), ('\u{146}', 325), ('\u{148}', 327), - ('\u{149}', 4194305), ('\u{14b}', 330), ('\u{14d}', 332), ('\u{14f}', 334), - ('\u{151}', 336), ('\u{153}', 338), ('\u{155}', 340), ('\u{157}', 342), ('\u{159}', 344), - ('\u{15b}', 346), ('\u{15d}', 348), ('\u{15f}', 350), ('\u{161}', 352), ('\u{163}', 354), - ('\u{165}', 356), ('\u{167}', 358), ('\u{169}', 360), ('\u{16b}', 362), ('\u{16d}', 364), - ('\u{16f}', 366), ('\u{171}', 368), ('\u{173}', 370), ('\u{175}', 372), ('\u{177}', 374), - ('\u{17a}', 377), ('\u{17c}', 379), ('\u{17e}', 381), ('\u{17f}', 83), ('\u{180}', 579), - ('\u{183}', 386), ('\u{185}', 388), ('\u{188}', 391), ('\u{18c}', 395), ('\u{192}', 401), - ('\u{195}', 502), ('\u{199}', 408), ('\u{19a}', 573), ('\u{19b}', 42972), ('\u{19e}', 544), - ('\u{1a1}', 416), ('\u{1a3}', 418), ('\u{1a5}', 420), ('\u{1a8}', 423), ('\u{1ad}', 428), - ('\u{1b0}', 431), ('\u{1b4}', 435), ('\u{1b6}', 437), ('\u{1b9}', 440), ('\u{1bd}', 444), - ('\u{1bf}', 503), ('\u{1c5}', 452), ('\u{1c6}', 452), ('\u{1c8}', 455), ('\u{1c9}', 455), - ('\u{1cb}', 458), ('\u{1cc}', 458), ('\u{1ce}', 461), ('\u{1d0}', 463), ('\u{1d2}', 465), - ('\u{1d4}', 467), ('\u{1d6}', 469), ('\u{1d8}', 471), ('\u{1da}', 473), ('\u{1dc}', 475), - ('\u{1dd}', 398), ('\u{1df}', 478), ('\u{1e1}', 480), ('\u{1e3}', 482), ('\u{1e5}', 484), - ('\u{1e7}', 486), ('\u{1e9}', 488), ('\u{1eb}', 490), ('\u{1ed}', 492), ('\u{1ef}', 494), - ('\u{1f0}', 4194306), ('\u{1f2}', 497), ('\u{1f3}', 497), ('\u{1f5}', 500), - ('\u{1f9}', 504), ('\u{1fb}', 506), ('\u{1fd}', 508), ('\u{1ff}', 510), ('\u{201}', 512), - ('\u{203}', 514), ('\u{205}', 516), ('\u{207}', 518), ('\u{209}', 520), ('\u{20b}', 522), - ('\u{20d}', 524), ('\u{20f}', 526), ('\u{211}', 528), ('\u{213}', 530), ('\u{215}', 532), - ('\u{217}', 534), ('\u{219}', 536), ('\u{21b}', 538), ('\u{21d}', 540), ('\u{21f}', 542), - ('\u{223}', 546), ('\u{225}', 548), ('\u{227}', 550), ('\u{229}', 552), ('\u{22b}', 554), - ('\u{22d}', 556), ('\u{22f}', 558), ('\u{231}', 560), ('\u{233}', 562), ('\u{23c}', 571), - ('\u{23f}', 11390), ('\u{240}', 11391), ('\u{242}', 577), ('\u{247}', 582), - ('\u{249}', 584), ('\u{24b}', 586), ('\u{24d}', 588), ('\u{24f}', 590), ('\u{250}', 11375), - ('\u{251}', 11373), ('\u{252}', 11376), ('\u{253}', 385), ('\u{254}', 390), - ('\u{256}', 393), ('\u{257}', 394), ('\u{259}', 399), ('\u{25b}', 400), ('\u{25c}', 42923), - ('\u{260}', 403), ('\u{261}', 42924), ('\u{263}', 404), ('\u{264}', 42955), - ('\u{265}', 42893), ('\u{266}', 42922), ('\u{268}', 407), ('\u{269}', 406), - ('\u{26a}', 42926), ('\u{26b}', 11362), ('\u{26c}', 42925), ('\u{26f}', 412), - ('\u{271}', 11374), ('\u{272}', 413), ('\u{275}', 415), ('\u{27d}', 11364), - ('\u{280}', 422), ('\u{282}', 42949), ('\u{283}', 425), ('\u{287}', 42929), - ('\u{288}', 430), ('\u{289}', 580), ('\u{28a}', 433), ('\u{28b}', 434), ('\u{28c}', 581), - ('\u{292}', 439), ('\u{29d}', 42930), ('\u{29e}', 42928), ('\u{345}', 921), - ('\u{371}', 880), ('\u{373}', 882), ('\u{377}', 886), ('\u{37b}', 1021), ('\u{37c}', 1022), - ('\u{37d}', 1023), ('\u{390}', 4194307), ('\u{3ac}', 902), ('\u{3ad}', 904), - ('\u{3ae}', 905), ('\u{3af}', 906), ('\u{3b0}', 4194308), ('\u{3b1}', 913), - ('\u{3b2}', 914), ('\u{3b3}', 915), ('\u{3b4}', 916), ('\u{3b5}', 917), ('\u{3b6}', 918), - ('\u{3b7}', 919), ('\u{3b8}', 920), ('\u{3b9}', 921), ('\u{3ba}', 922), ('\u{3bb}', 923), - ('\u{3bc}', 924), ('\u{3bd}', 925), ('\u{3be}', 926), ('\u{3bf}', 927), ('\u{3c0}', 928), - ('\u{3c1}', 929), ('\u{3c2}', 931), ('\u{3c3}', 931), ('\u{3c4}', 932), ('\u{3c5}', 933), - ('\u{3c6}', 934), ('\u{3c7}', 935), ('\u{3c8}', 936), ('\u{3c9}', 937), ('\u{3ca}', 938), - ('\u{3cb}', 939), ('\u{3cc}', 908), ('\u{3cd}', 910), ('\u{3ce}', 911), ('\u{3d0}', 914), - ('\u{3d1}', 920), ('\u{3d5}', 934), ('\u{3d6}', 928), ('\u{3d7}', 975), ('\u{3d9}', 984), - ('\u{3db}', 986), ('\u{3dd}', 988), ('\u{3df}', 990), ('\u{3e1}', 992), ('\u{3e3}', 994), - ('\u{3e5}', 996), ('\u{3e7}', 998), ('\u{3e9}', 1000), ('\u{3eb}', 1002), ('\u{3ed}', 1004), - ('\u{3ef}', 1006), ('\u{3f0}', 922), ('\u{3f1}', 929), ('\u{3f2}', 1017), ('\u{3f3}', 895), - ('\u{3f5}', 917), ('\u{3f8}', 1015), ('\u{3fb}', 1018), ('\u{430}', 1040), - ('\u{431}', 1041), ('\u{432}', 1042), ('\u{433}', 1043), ('\u{434}', 1044), - ('\u{435}', 1045), ('\u{436}', 1046), ('\u{437}', 1047), ('\u{438}', 1048), - ('\u{439}', 1049), ('\u{43a}', 1050), ('\u{43b}', 1051), ('\u{43c}', 1052), - ('\u{43d}', 1053), ('\u{43e}', 1054), ('\u{43f}', 1055), ('\u{440}', 1056), - ('\u{441}', 1057), ('\u{442}', 1058), ('\u{443}', 1059), ('\u{444}', 1060), - ('\u{445}', 1061), ('\u{446}', 1062), ('\u{447}', 1063), ('\u{448}', 1064), - ('\u{449}', 1065), ('\u{44a}', 1066), ('\u{44b}', 1067), ('\u{44c}', 1068), - ('\u{44d}', 1069), ('\u{44e}', 1070), ('\u{44f}', 1071), ('\u{450}', 1024), - ('\u{451}', 1025), ('\u{452}', 1026), ('\u{453}', 1027), ('\u{454}', 1028), - ('\u{455}', 1029), ('\u{456}', 1030), ('\u{457}', 1031), ('\u{458}', 1032), - ('\u{459}', 1033), ('\u{45a}', 1034), ('\u{45b}', 1035), ('\u{45c}', 1036), - ('\u{45d}', 1037), ('\u{45e}', 1038), ('\u{45f}', 1039), ('\u{461}', 1120), - ('\u{463}', 1122), ('\u{465}', 1124), ('\u{467}', 1126), ('\u{469}', 1128), - ('\u{46b}', 1130), ('\u{46d}', 1132), ('\u{46f}', 1134), ('\u{471}', 1136), - ('\u{473}', 1138), ('\u{475}', 1140), ('\u{477}', 1142), ('\u{479}', 1144), - ('\u{47b}', 1146), ('\u{47d}', 1148), ('\u{47f}', 1150), ('\u{481}', 1152), - ('\u{48b}', 1162), ('\u{48d}', 1164), ('\u{48f}', 1166), ('\u{491}', 1168), - ('\u{493}', 1170), ('\u{495}', 1172), ('\u{497}', 1174), ('\u{499}', 1176), - ('\u{49b}', 1178), ('\u{49d}', 1180), ('\u{49f}', 1182), ('\u{4a1}', 1184), - ('\u{4a3}', 1186), ('\u{4a5}', 1188), ('\u{4a7}', 1190), ('\u{4a9}', 1192), - ('\u{4ab}', 1194), ('\u{4ad}', 1196), ('\u{4af}', 1198), ('\u{4b1}', 1200), - ('\u{4b3}', 1202), ('\u{4b5}', 1204), ('\u{4b7}', 1206), ('\u{4b9}', 1208), - ('\u{4bb}', 1210), ('\u{4bd}', 1212), ('\u{4bf}', 1214), ('\u{4c2}', 1217), - ('\u{4c4}', 1219), ('\u{4c6}', 1221), ('\u{4c8}', 1223), ('\u{4ca}', 1225), - ('\u{4cc}', 1227), ('\u{4ce}', 1229), ('\u{4cf}', 1216), ('\u{4d1}', 1232), - ('\u{4d3}', 1234), ('\u{4d5}', 1236), ('\u{4d7}', 1238), ('\u{4d9}', 1240), - ('\u{4db}', 1242), ('\u{4dd}', 1244), ('\u{4df}', 1246), ('\u{4e1}', 1248), - ('\u{4e3}', 1250), ('\u{4e5}', 1252), ('\u{4e7}', 1254), ('\u{4e9}', 1256), - ('\u{4eb}', 1258), ('\u{4ed}', 1260), ('\u{4ef}', 1262), ('\u{4f1}', 1264), - ('\u{4f3}', 1266), ('\u{4f5}', 1268), ('\u{4f7}', 1270), ('\u{4f9}', 1272), - ('\u{4fb}', 1274), ('\u{4fd}', 1276), ('\u{4ff}', 1278), ('\u{501}', 1280), - ('\u{503}', 1282), ('\u{505}', 1284), ('\u{507}', 1286), ('\u{509}', 1288), - ('\u{50b}', 1290), ('\u{50d}', 1292), ('\u{50f}', 1294), ('\u{511}', 1296), - ('\u{513}', 1298), ('\u{515}', 1300), ('\u{517}', 1302), ('\u{519}', 1304), - ('\u{51b}', 1306), ('\u{51d}', 1308), ('\u{51f}', 1310), ('\u{521}', 1312), - ('\u{523}', 1314), ('\u{525}', 1316), ('\u{527}', 1318), ('\u{529}', 1320), - ('\u{52b}', 1322), ('\u{52d}', 1324), ('\u{52f}', 1326), ('\u{561}', 1329), - ('\u{562}', 1330), ('\u{563}', 1331), ('\u{564}', 1332), ('\u{565}', 1333), - ('\u{566}', 1334), ('\u{567}', 1335), ('\u{568}', 1336), ('\u{569}', 1337), - ('\u{56a}', 1338), ('\u{56b}', 1339), ('\u{56c}', 1340), ('\u{56d}', 1341), - ('\u{56e}', 1342), ('\u{56f}', 1343), ('\u{570}', 1344), ('\u{571}', 1345), - ('\u{572}', 1346), ('\u{573}', 1347), ('\u{574}', 1348), ('\u{575}', 1349), - ('\u{576}', 1350), ('\u{577}', 1351), ('\u{578}', 1352), ('\u{579}', 1353), - ('\u{57a}', 1354), ('\u{57b}', 1355), ('\u{57c}', 1356), ('\u{57d}', 1357), - ('\u{57e}', 1358), ('\u{57f}', 1359), ('\u{580}', 1360), ('\u{581}', 1361), - ('\u{582}', 1362), ('\u{583}', 1363), ('\u{584}', 1364), ('\u{585}', 1365), - ('\u{586}', 1366), ('\u{587}', 4194309), ('\u{10d0}', 7312), ('\u{10d1}', 7313), - ('\u{10d2}', 7314), ('\u{10d3}', 7315), ('\u{10d4}', 7316), ('\u{10d5}', 7317), - ('\u{10d6}', 7318), ('\u{10d7}', 7319), ('\u{10d8}', 7320), ('\u{10d9}', 7321), - ('\u{10da}', 7322), ('\u{10db}', 7323), ('\u{10dc}', 7324), ('\u{10dd}', 7325), - ('\u{10de}', 7326), ('\u{10df}', 7327), ('\u{10e0}', 7328), ('\u{10e1}', 7329), - ('\u{10e2}', 7330), ('\u{10e3}', 7331), ('\u{10e4}', 7332), ('\u{10e5}', 7333), - ('\u{10e6}', 7334), ('\u{10e7}', 7335), ('\u{10e8}', 7336), ('\u{10e9}', 7337), - ('\u{10ea}', 7338), ('\u{10eb}', 7339), ('\u{10ec}', 7340), ('\u{10ed}', 7341), - ('\u{10ee}', 7342), ('\u{10ef}', 7343), ('\u{10f0}', 7344), ('\u{10f1}', 7345), - ('\u{10f2}', 7346), ('\u{10f3}', 7347), ('\u{10f4}', 7348), ('\u{10f5}', 7349), - ('\u{10f6}', 7350), ('\u{10f7}', 7351), ('\u{10f8}', 7352), ('\u{10f9}', 7353), - ('\u{10fa}', 7354), ('\u{10fd}', 7357), ('\u{10fe}', 7358), ('\u{10ff}', 7359), - ('\u{13f8}', 5104), ('\u{13f9}', 5105), ('\u{13fa}', 5106), ('\u{13fb}', 5107), - ('\u{13fc}', 5108), ('\u{13fd}', 5109), ('\u{1c80}', 1042), ('\u{1c81}', 1044), - ('\u{1c82}', 1054), ('\u{1c83}', 1057), ('\u{1c84}', 1058), ('\u{1c85}', 1058), - ('\u{1c86}', 1066), ('\u{1c87}', 1122), ('\u{1c88}', 42570), ('\u{1c8a}', 7305), - ('\u{1d79}', 42877), ('\u{1d7d}', 11363), ('\u{1d8e}', 42950), ('\u{1e01}', 7680), - ('\u{1e03}', 7682), ('\u{1e05}', 7684), ('\u{1e07}', 7686), ('\u{1e09}', 7688), - ('\u{1e0b}', 7690), ('\u{1e0d}', 7692), ('\u{1e0f}', 7694), ('\u{1e11}', 7696), - ('\u{1e13}', 7698), ('\u{1e15}', 7700), ('\u{1e17}', 7702), ('\u{1e19}', 7704), - ('\u{1e1b}', 7706), ('\u{1e1d}', 7708), ('\u{1e1f}', 7710), ('\u{1e21}', 7712), - ('\u{1e23}', 7714), ('\u{1e25}', 7716), ('\u{1e27}', 7718), ('\u{1e29}', 7720), - ('\u{1e2b}', 7722), ('\u{1e2d}', 7724), ('\u{1e2f}', 7726), ('\u{1e31}', 7728), - ('\u{1e33}', 7730), ('\u{1e35}', 7732), ('\u{1e37}', 7734), ('\u{1e39}', 7736), - ('\u{1e3b}', 7738), ('\u{1e3d}', 7740), ('\u{1e3f}', 7742), ('\u{1e41}', 7744), - ('\u{1e43}', 7746), ('\u{1e45}', 7748), ('\u{1e47}', 7750), ('\u{1e49}', 7752), - ('\u{1e4b}', 7754), ('\u{1e4d}', 7756), ('\u{1e4f}', 7758), ('\u{1e51}', 7760), - ('\u{1e53}', 7762), ('\u{1e55}', 7764), ('\u{1e57}', 7766), ('\u{1e59}', 7768), - ('\u{1e5b}', 7770), ('\u{1e5d}', 7772), ('\u{1e5f}', 7774), ('\u{1e61}', 7776), - ('\u{1e63}', 7778), ('\u{1e65}', 7780), ('\u{1e67}', 7782), ('\u{1e69}', 7784), - ('\u{1e6b}', 7786), ('\u{1e6d}', 7788), ('\u{1e6f}', 7790), ('\u{1e71}', 7792), - ('\u{1e73}', 7794), ('\u{1e75}', 7796), ('\u{1e77}', 7798), ('\u{1e79}', 7800), - ('\u{1e7b}', 7802), ('\u{1e7d}', 7804), ('\u{1e7f}', 7806), ('\u{1e81}', 7808), - ('\u{1e83}', 7810), ('\u{1e85}', 7812), ('\u{1e87}', 7814), ('\u{1e89}', 7816), - ('\u{1e8b}', 7818), ('\u{1e8d}', 7820), ('\u{1e8f}', 7822), ('\u{1e91}', 7824), - ('\u{1e93}', 7826), ('\u{1e95}', 7828), ('\u{1e96}', 4194310), ('\u{1e97}', 4194311), - ('\u{1e98}', 4194312), ('\u{1e99}', 4194313), ('\u{1e9a}', 4194314), ('\u{1e9b}', 7776), - ('\u{1ea1}', 7840), ('\u{1ea3}', 7842), ('\u{1ea5}', 7844), ('\u{1ea7}', 7846), - ('\u{1ea9}', 7848), ('\u{1eab}', 7850), ('\u{1ead}', 7852), ('\u{1eaf}', 7854), - ('\u{1eb1}', 7856), ('\u{1eb3}', 7858), ('\u{1eb5}', 7860), ('\u{1eb7}', 7862), - ('\u{1eb9}', 7864), ('\u{1ebb}', 7866), ('\u{1ebd}', 7868), ('\u{1ebf}', 7870), - ('\u{1ec1}', 7872), ('\u{1ec3}', 7874), ('\u{1ec5}', 7876), ('\u{1ec7}', 7878), - ('\u{1ec9}', 7880), ('\u{1ecb}', 7882), ('\u{1ecd}', 7884), ('\u{1ecf}', 7886), - ('\u{1ed1}', 7888), ('\u{1ed3}', 7890), ('\u{1ed5}', 7892), ('\u{1ed7}', 7894), - ('\u{1ed9}', 7896), ('\u{1edb}', 7898), ('\u{1edd}', 7900), ('\u{1edf}', 7902), - ('\u{1ee1}', 7904), ('\u{1ee3}', 7906), ('\u{1ee5}', 7908), ('\u{1ee7}', 7910), - ('\u{1ee9}', 7912), ('\u{1eeb}', 7914), ('\u{1eed}', 7916), ('\u{1eef}', 7918), - ('\u{1ef1}', 7920), ('\u{1ef3}', 7922), ('\u{1ef5}', 7924), ('\u{1ef7}', 7926), - ('\u{1ef9}', 7928), ('\u{1efb}', 7930), ('\u{1efd}', 7932), ('\u{1eff}', 7934), - ('\u{1f00}', 7944), ('\u{1f01}', 7945), ('\u{1f02}', 7946), ('\u{1f03}', 7947), - ('\u{1f04}', 7948), ('\u{1f05}', 7949), ('\u{1f06}', 7950), ('\u{1f07}', 7951), - ('\u{1f10}', 7960), ('\u{1f11}', 7961), ('\u{1f12}', 7962), ('\u{1f13}', 7963), - ('\u{1f14}', 7964), ('\u{1f15}', 7965), ('\u{1f20}', 7976), ('\u{1f21}', 7977), - ('\u{1f22}', 7978), ('\u{1f23}', 7979), ('\u{1f24}', 7980), ('\u{1f25}', 7981), - ('\u{1f26}', 7982), ('\u{1f27}', 7983), ('\u{1f30}', 7992), ('\u{1f31}', 7993), - ('\u{1f32}', 7994), ('\u{1f33}', 7995), ('\u{1f34}', 7996), ('\u{1f35}', 7997), - ('\u{1f36}', 7998), ('\u{1f37}', 7999), ('\u{1f40}', 8008), ('\u{1f41}', 8009), - ('\u{1f42}', 8010), ('\u{1f43}', 8011), ('\u{1f44}', 8012), ('\u{1f45}', 8013), - ('\u{1f50}', 4194315), ('\u{1f51}', 8025), ('\u{1f52}', 4194316), ('\u{1f53}', 8027), - ('\u{1f54}', 4194317), ('\u{1f55}', 8029), ('\u{1f56}', 4194318), ('\u{1f57}', 8031), - ('\u{1f60}', 8040), ('\u{1f61}', 8041), ('\u{1f62}', 8042), ('\u{1f63}', 8043), - ('\u{1f64}', 8044), ('\u{1f65}', 8045), ('\u{1f66}', 8046), ('\u{1f67}', 8047), - ('\u{1f70}', 8122), ('\u{1f71}', 8123), ('\u{1f72}', 8136), ('\u{1f73}', 8137), - ('\u{1f74}', 8138), ('\u{1f75}', 8139), ('\u{1f76}', 8154), ('\u{1f77}', 8155), - ('\u{1f78}', 8184), ('\u{1f79}', 8185), ('\u{1f7a}', 8170), ('\u{1f7b}', 8171), - ('\u{1f7c}', 8186), ('\u{1f7d}', 8187), ('\u{1f80}', 4194319), ('\u{1f81}', 4194320), - ('\u{1f82}', 4194321), ('\u{1f83}', 4194322), ('\u{1f84}', 4194323), ('\u{1f85}', 4194324), - ('\u{1f86}', 4194325), ('\u{1f87}', 4194326), ('\u{1f88}', 4194327), ('\u{1f89}', 4194328), - ('\u{1f8a}', 4194329), ('\u{1f8b}', 4194330), ('\u{1f8c}', 4194331), ('\u{1f8d}', 4194332), - ('\u{1f8e}', 4194333), ('\u{1f8f}', 4194334), ('\u{1f90}', 4194335), ('\u{1f91}', 4194336), - ('\u{1f92}', 4194337), ('\u{1f93}', 4194338), ('\u{1f94}', 4194339), ('\u{1f95}', 4194340), - ('\u{1f96}', 4194341), ('\u{1f97}', 4194342), ('\u{1f98}', 4194343), ('\u{1f99}', 4194344), - ('\u{1f9a}', 4194345), ('\u{1f9b}', 4194346), ('\u{1f9c}', 4194347), ('\u{1f9d}', 4194348), - ('\u{1f9e}', 4194349), ('\u{1f9f}', 4194350), ('\u{1fa0}', 4194351), ('\u{1fa1}', 4194352), - ('\u{1fa2}', 4194353), ('\u{1fa3}', 4194354), ('\u{1fa4}', 4194355), ('\u{1fa5}', 4194356), - ('\u{1fa6}', 4194357), ('\u{1fa7}', 4194358), ('\u{1fa8}', 4194359), ('\u{1fa9}', 4194360), - ('\u{1faa}', 4194361), ('\u{1fab}', 4194362), ('\u{1fac}', 4194363), ('\u{1fad}', 4194364), - ('\u{1fae}', 4194365), ('\u{1faf}', 4194366), ('\u{1fb0}', 8120), ('\u{1fb1}', 8121), - ('\u{1fb2}', 4194367), ('\u{1fb3}', 4194368), ('\u{1fb4}', 4194369), ('\u{1fb6}', 4194370), - ('\u{1fb7}', 4194371), ('\u{1fbc}', 4194372), ('\u{1fbe}', 921), ('\u{1fc2}', 4194373), - ('\u{1fc3}', 4194374), ('\u{1fc4}', 4194375), ('\u{1fc6}', 4194376), ('\u{1fc7}', 4194377), - ('\u{1fcc}', 4194378), ('\u{1fd0}', 8152), ('\u{1fd1}', 8153), ('\u{1fd2}', 4194379), - ('\u{1fd3}', 4194380), ('\u{1fd6}', 4194381), ('\u{1fd7}', 4194382), ('\u{1fe0}', 8168), - ('\u{1fe1}', 8169), ('\u{1fe2}', 4194383), ('\u{1fe3}', 4194384), ('\u{1fe4}', 4194385), - ('\u{1fe5}', 8172), ('\u{1fe6}', 4194386), ('\u{1fe7}', 4194387), ('\u{1ff2}', 4194388), - ('\u{1ff3}', 4194389), ('\u{1ff4}', 4194390), ('\u{1ff6}', 4194391), ('\u{1ff7}', 4194392), - ('\u{1ffc}', 4194393), ('\u{214e}', 8498), ('\u{2170}', 8544), ('\u{2171}', 8545), - ('\u{2172}', 8546), ('\u{2173}', 8547), ('\u{2174}', 8548), ('\u{2175}', 8549), - ('\u{2176}', 8550), ('\u{2177}', 8551), ('\u{2178}', 8552), ('\u{2179}', 8553), - ('\u{217a}', 8554), ('\u{217b}', 8555), ('\u{217c}', 8556), ('\u{217d}', 8557), - ('\u{217e}', 8558), ('\u{217f}', 8559), ('\u{2184}', 8579), ('\u{24d0}', 9398), - ('\u{24d1}', 9399), ('\u{24d2}', 9400), ('\u{24d3}', 9401), ('\u{24d4}', 9402), - ('\u{24d5}', 9403), ('\u{24d6}', 9404), ('\u{24d7}', 9405), ('\u{24d8}', 9406), - ('\u{24d9}', 9407), ('\u{24da}', 9408), ('\u{24db}', 9409), ('\u{24dc}', 9410), - ('\u{24dd}', 9411), ('\u{24de}', 9412), ('\u{24df}', 9413), ('\u{24e0}', 9414), - ('\u{24e1}', 9415), ('\u{24e2}', 9416), ('\u{24e3}', 9417), ('\u{24e4}', 9418), - ('\u{24e5}', 9419), ('\u{24e6}', 9420), ('\u{24e7}', 9421), ('\u{24e8}', 9422), - ('\u{24e9}', 9423), ('\u{2c30}', 11264), ('\u{2c31}', 11265), ('\u{2c32}', 11266), - ('\u{2c33}', 11267), ('\u{2c34}', 11268), ('\u{2c35}', 11269), ('\u{2c36}', 11270), - ('\u{2c37}', 11271), ('\u{2c38}', 11272), ('\u{2c39}', 11273), ('\u{2c3a}', 11274), - ('\u{2c3b}', 11275), ('\u{2c3c}', 11276), ('\u{2c3d}', 11277), ('\u{2c3e}', 11278), - ('\u{2c3f}', 11279), ('\u{2c40}', 11280), ('\u{2c41}', 11281), ('\u{2c42}', 11282), - ('\u{2c43}', 11283), ('\u{2c44}', 11284), ('\u{2c45}', 11285), ('\u{2c46}', 11286), - ('\u{2c47}', 11287), ('\u{2c48}', 11288), ('\u{2c49}', 11289), ('\u{2c4a}', 11290), - ('\u{2c4b}', 11291), ('\u{2c4c}', 11292), ('\u{2c4d}', 11293), ('\u{2c4e}', 11294), - ('\u{2c4f}', 11295), ('\u{2c50}', 11296), ('\u{2c51}', 11297), ('\u{2c52}', 11298), - ('\u{2c53}', 11299), ('\u{2c54}', 11300), ('\u{2c55}', 11301), ('\u{2c56}', 11302), - ('\u{2c57}', 11303), ('\u{2c58}', 11304), ('\u{2c59}', 11305), ('\u{2c5a}', 11306), - ('\u{2c5b}', 11307), ('\u{2c5c}', 11308), ('\u{2c5d}', 11309), ('\u{2c5e}', 11310), - ('\u{2c5f}', 11311), ('\u{2c61}', 11360), ('\u{2c65}', 570), ('\u{2c66}', 574), - ('\u{2c68}', 11367), ('\u{2c6a}', 11369), ('\u{2c6c}', 11371), ('\u{2c73}', 11378), - ('\u{2c76}', 11381), ('\u{2c81}', 11392), ('\u{2c83}', 11394), ('\u{2c85}', 11396), - ('\u{2c87}', 11398), ('\u{2c89}', 11400), ('\u{2c8b}', 11402), ('\u{2c8d}', 11404), - ('\u{2c8f}', 11406), ('\u{2c91}', 11408), ('\u{2c93}', 11410), ('\u{2c95}', 11412), - ('\u{2c97}', 11414), ('\u{2c99}', 11416), ('\u{2c9b}', 11418), ('\u{2c9d}', 11420), - ('\u{2c9f}', 11422), ('\u{2ca1}', 11424), ('\u{2ca3}', 11426), ('\u{2ca5}', 11428), - ('\u{2ca7}', 11430), ('\u{2ca9}', 11432), ('\u{2cab}', 11434), ('\u{2cad}', 11436), - ('\u{2caf}', 11438), ('\u{2cb1}', 11440), ('\u{2cb3}', 11442), ('\u{2cb5}', 11444), - ('\u{2cb7}', 11446), ('\u{2cb9}', 11448), ('\u{2cbb}', 11450), ('\u{2cbd}', 11452), - ('\u{2cbf}', 11454), ('\u{2cc1}', 11456), ('\u{2cc3}', 11458), ('\u{2cc5}', 11460), - ('\u{2cc7}', 11462), ('\u{2cc9}', 11464), ('\u{2ccb}', 11466), ('\u{2ccd}', 11468), - ('\u{2ccf}', 11470), ('\u{2cd1}', 11472), ('\u{2cd3}', 11474), ('\u{2cd5}', 11476), - ('\u{2cd7}', 11478), ('\u{2cd9}', 11480), ('\u{2cdb}', 11482), ('\u{2cdd}', 11484), - ('\u{2cdf}', 11486), ('\u{2ce1}', 11488), ('\u{2ce3}', 11490), ('\u{2cec}', 11499), - ('\u{2cee}', 11501), ('\u{2cf3}', 11506), ('\u{2d00}', 4256), ('\u{2d01}', 4257), - ('\u{2d02}', 4258), ('\u{2d03}', 4259), ('\u{2d04}', 4260), ('\u{2d05}', 4261), - ('\u{2d06}', 4262), ('\u{2d07}', 4263), ('\u{2d08}', 4264), ('\u{2d09}', 4265), - ('\u{2d0a}', 4266), ('\u{2d0b}', 4267), ('\u{2d0c}', 4268), ('\u{2d0d}', 4269), - ('\u{2d0e}', 4270), ('\u{2d0f}', 4271), ('\u{2d10}', 4272), ('\u{2d11}', 4273), - ('\u{2d12}', 4274), ('\u{2d13}', 4275), ('\u{2d14}', 4276), ('\u{2d15}', 4277), - ('\u{2d16}', 4278), ('\u{2d17}', 4279), ('\u{2d18}', 4280), ('\u{2d19}', 4281), - ('\u{2d1a}', 4282), ('\u{2d1b}', 4283), ('\u{2d1c}', 4284), ('\u{2d1d}', 4285), - ('\u{2d1e}', 4286), ('\u{2d1f}', 4287), ('\u{2d20}', 4288), ('\u{2d21}', 4289), - ('\u{2d22}', 4290), ('\u{2d23}', 4291), ('\u{2d24}', 4292), ('\u{2d25}', 4293), - ('\u{2d27}', 4295), ('\u{2d2d}', 4301), ('\u{a641}', 42560), ('\u{a643}', 42562), - ('\u{a645}', 42564), ('\u{a647}', 42566), ('\u{a649}', 42568), ('\u{a64b}', 42570), - ('\u{a64d}', 42572), ('\u{a64f}', 42574), ('\u{a651}', 42576), ('\u{a653}', 42578), - ('\u{a655}', 42580), ('\u{a657}', 42582), ('\u{a659}', 42584), ('\u{a65b}', 42586), - ('\u{a65d}', 42588), ('\u{a65f}', 42590), ('\u{a661}', 42592), ('\u{a663}', 42594), - ('\u{a665}', 42596), ('\u{a667}', 42598), ('\u{a669}', 42600), ('\u{a66b}', 42602), - ('\u{a66d}', 42604), ('\u{a681}', 42624), ('\u{a683}', 42626), ('\u{a685}', 42628), - ('\u{a687}', 42630), ('\u{a689}', 42632), ('\u{a68b}', 42634), ('\u{a68d}', 42636), - ('\u{a68f}', 42638), ('\u{a691}', 42640), ('\u{a693}', 42642), ('\u{a695}', 42644), - ('\u{a697}', 42646), ('\u{a699}', 42648), ('\u{a69b}', 42650), ('\u{a723}', 42786), - ('\u{a725}', 42788), ('\u{a727}', 42790), ('\u{a729}', 42792), ('\u{a72b}', 42794), - ('\u{a72d}', 42796), ('\u{a72f}', 42798), ('\u{a733}', 42802), ('\u{a735}', 42804), - ('\u{a737}', 42806), ('\u{a739}', 42808), ('\u{a73b}', 42810), ('\u{a73d}', 42812), - ('\u{a73f}', 42814), ('\u{a741}', 42816), ('\u{a743}', 42818), ('\u{a745}', 42820), - ('\u{a747}', 42822), ('\u{a749}', 42824), ('\u{a74b}', 42826), ('\u{a74d}', 42828), - ('\u{a74f}', 42830), ('\u{a751}', 42832), ('\u{a753}', 42834), ('\u{a755}', 42836), - ('\u{a757}', 42838), ('\u{a759}', 42840), ('\u{a75b}', 42842), ('\u{a75d}', 42844), - ('\u{a75f}', 42846), ('\u{a761}', 42848), ('\u{a763}', 42850), ('\u{a765}', 42852), - ('\u{a767}', 42854), ('\u{a769}', 42856), ('\u{a76b}', 42858), ('\u{a76d}', 42860), - ('\u{a76f}', 42862), ('\u{a77a}', 42873), ('\u{a77c}', 42875), ('\u{a77f}', 42878), - ('\u{a781}', 42880), ('\u{a783}', 42882), ('\u{a785}', 42884), ('\u{a787}', 42886), - ('\u{a78c}', 42891), ('\u{a791}', 42896), ('\u{a793}', 42898), ('\u{a794}', 42948), - ('\u{a797}', 42902), ('\u{a799}', 42904), ('\u{a79b}', 42906), ('\u{a79d}', 42908), - ('\u{a79f}', 42910), ('\u{a7a1}', 42912), ('\u{a7a3}', 42914), ('\u{a7a5}', 42916), - ('\u{a7a7}', 42918), ('\u{a7a9}', 42920), ('\u{a7b5}', 42932), ('\u{a7b7}', 42934), - ('\u{a7b9}', 42936), ('\u{a7bb}', 42938), ('\u{a7bd}', 42940), ('\u{a7bf}', 42942), - ('\u{a7c1}', 42944), ('\u{a7c3}', 42946), ('\u{a7c8}', 42951), ('\u{a7ca}', 42953), - ('\u{a7cd}', 42956), ('\u{a7cf}', 42958), ('\u{a7d1}', 42960), ('\u{a7d3}', 42962), - ('\u{a7d5}', 42964), ('\u{a7d7}', 42966), ('\u{a7d9}', 42968), ('\u{a7db}', 42970), - ('\u{a7f6}', 42997), ('\u{ab53}', 42931), ('\u{ab70}', 5024), ('\u{ab71}', 5025), - ('\u{ab72}', 5026), ('\u{ab73}', 5027), ('\u{ab74}', 5028), ('\u{ab75}', 5029), - ('\u{ab76}', 5030), ('\u{ab77}', 5031), ('\u{ab78}', 5032), ('\u{ab79}', 5033), - ('\u{ab7a}', 5034), ('\u{ab7b}', 5035), ('\u{ab7c}', 5036), ('\u{ab7d}', 5037), - ('\u{ab7e}', 5038), ('\u{ab7f}', 5039), ('\u{ab80}', 5040), ('\u{ab81}', 5041), - ('\u{ab82}', 5042), ('\u{ab83}', 5043), ('\u{ab84}', 5044), ('\u{ab85}', 5045), - ('\u{ab86}', 5046), ('\u{ab87}', 5047), ('\u{ab88}', 5048), ('\u{ab89}', 5049), - ('\u{ab8a}', 5050), ('\u{ab8b}', 5051), ('\u{ab8c}', 5052), ('\u{ab8d}', 5053), - ('\u{ab8e}', 5054), ('\u{ab8f}', 5055), ('\u{ab90}', 5056), ('\u{ab91}', 5057), - ('\u{ab92}', 5058), ('\u{ab93}', 5059), ('\u{ab94}', 5060), ('\u{ab95}', 5061), - ('\u{ab96}', 5062), ('\u{ab97}', 5063), ('\u{ab98}', 5064), ('\u{ab99}', 5065), - ('\u{ab9a}', 5066), ('\u{ab9b}', 5067), ('\u{ab9c}', 5068), ('\u{ab9d}', 5069), - ('\u{ab9e}', 5070), ('\u{ab9f}', 5071), ('\u{aba0}', 5072), ('\u{aba1}', 5073), - ('\u{aba2}', 5074), ('\u{aba3}', 5075), ('\u{aba4}', 5076), ('\u{aba5}', 5077), - ('\u{aba6}', 5078), ('\u{aba7}', 5079), ('\u{aba8}', 5080), ('\u{aba9}', 5081), - ('\u{abaa}', 5082), ('\u{abab}', 5083), ('\u{abac}', 5084), ('\u{abad}', 5085), - ('\u{abae}', 5086), ('\u{abaf}', 5087), ('\u{abb0}', 5088), ('\u{abb1}', 5089), - ('\u{abb2}', 5090), ('\u{abb3}', 5091), ('\u{abb4}', 5092), ('\u{abb5}', 5093), - ('\u{abb6}', 5094), ('\u{abb7}', 5095), ('\u{abb8}', 5096), ('\u{abb9}', 5097), - ('\u{abba}', 5098), ('\u{abbb}', 5099), ('\u{abbc}', 5100), ('\u{abbd}', 5101), - ('\u{abbe}', 5102), ('\u{abbf}', 5103), ('\u{fb00}', 4194394), ('\u{fb01}', 4194395), - ('\u{fb02}', 4194396), ('\u{fb03}', 4194397), ('\u{fb04}', 4194398), ('\u{fb05}', 4194399), - ('\u{fb06}', 4194400), ('\u{fb13}', 4194401), ('\u{fb14}', 4194402), ('\u{fb15}', 4194403), - ('\u{fb16}', 4194404), ('\u{fb17}', 4194405), ('\u{ff41}', 65313), ('\u{ff42}', 65314), - ('\u{ff43}', 65315), ('\u{ff44}', 65316), ('\u{ff45}', 65317), ('\u{ff46}', 65318), - ('\u{ff47}', 65319), ('\u{ff48}', 65320), ('\u{ff49}', 65321), ('\u{ff4a}', 65322), - ('\u{ff4b}', 65323), ('\u{ff4c}', 65324), ('\u{ff4d}', 65325), ('\u{ff4e}', 65326), - ('\u{ff4f}', 65327), ('\u{ff50}', 65328), ('\u{ff51}', 65329), ('\u{ff52}', 65330), - ('\u{ff53}', 65331), ('\u{ff54}', 65332), ('\u{ff55}', 65333), ('\u{ff56}', 65334), - ('\u{ff57}', 65335), ('\u{ff58}', 65336), ('\u{ff59}', 65337), ('\u{ff5a}', 65338), - ('\u{10428}', 66560), ('\u{10429}', 66561), ('\u{1042a}', 66562), ('\u{1042b}', 66563), - ('\u{1042c}', 66564), ('\u{1042d}', 66565), ('\u{1042e}', 66566), ('\u{1042f}', 66567), - ('\u{10430}', 66568), ('\u{10431}', 66569), ('\u{10432}', 66570), ('\u{10433}', 66571), - ('\u{10434}', 66572), ('\u{10435}', 66573), ('\u{10436}', 66574), ('\u{10437}', 66575), - ('\u{10438}', 66576), ('\u{10439}', 66577), ('\u{1043a}', 66578), ('\u{1043b}', 66579), - ('\u{1043c}', 66580), ('\u{1043d}', 66581), ('\u{1043e}', 66582), ('\u{1043f}', 66583), - ('\u{10440}', 66584), ('\u{10441}', 66585), ('\u{10442}', 66586), ('\u{10443}', 66587), - ('\u{10444}', 66588), ('\u{10445}', 66589), ('\u{10446}', 66590), ('\u{10447}', 66591), - ('\u{10448}', 66592), ('\u{10449}', 66593), ('\u{1044a}', 66594), ('\u{1044b}', 66595), - ('\u{1044c}', 66596), ('\u{1044d}', 66597), ('\u{1044e}', 66598), ('\u{1044f}', 66599), - ('\u{104d8}', 66736), ('\u{104d9}', 66737), ('\u{104da}', 66738), ('\u{104db}', 66739), - ('\u{104dc}', 66740), ('\u{104dd}', 66741), ('\u{104de}', 66742), ('\u{104df}', 66743), - ('\u{104e0}', 66744), ('\u{104e1}', 66745), ('\u{104e2}', 66746), ('\u{104e3}', 66747), - ('\u{104e4}', 66748), ('\u{104e5}', 66749), ('\u{104e6}', 66750), ('\u{104e7}', 66751), - ('\u{104e8}', 66752), ('\u{104e9}', 66753), ('\u{104ea}', 66754), ('\u{104eb}', 66755), - ('\u{104ec}', 66756), ('\u{104ed}', 66757), ('\u{104ee}', 66758), ('\u{104ef}', 66759), - ('\u{104f0}', 66760), ('\u{104f1}', 66761), ('\u{104f2}', 66762), ('\u{104f3}', 66763), - ('\u{104f4}', 66764), ('\u{104f5}', 66765), ('\u{104f6}', 66766), ('\u{104f7}', 66767), - ('\u{104f8}', 66768), ('\u{104f9}', 66769), ('\u{104fa}', 66770), ('\u{104fb}', 66771), - ('\u{10597}', 66928), ('\u{10598}', 66929), ('\u{10599}', 66930), ('\u{1059a}', 66931), - ('\u{1059b}', 66932), ('\u{1059c}', 66933), ('\u{1059d}', 66934), ('\u{1059e}', 66935), - ('\u{1059f}', 66936), ('\u{105a0}', 66937), ('\u{105a1}', 66938), ('\u{105a3}', 66940), - ('\u{105a4}', 66941), ('\u{105a5}', 66942), ('\u{105a6}', 66943), ('\u{105a7}', 66944), - ('\u{105a8}', 66945), ('\u{105a9}', 66946), ('\u{105aa}', 66947), ('\u{105ab}', 66948), - ('\u{105ac}', 66949), ('\u{105ad}', 66950), ('\u{105ae}', 66951), ('\u{105af}', 66952), - ('\u{105b0}', 66953), ('\u{105b1}', 66954), ('\u{105b3}', 66956), ('\u{105b4}', 66957), - ('\u{105b5}', 66958), ('\u{105b6}', 66959), ('\u{105b7}', 66960), ('\u{105b8}', 66961), - ('\u{105b9}', 66962), ('\u{105bb}', 66964), ('\u{105bc}', 66965), ('\u{10cc0}', 68736), - ('\u{10cc1}', 68737), ('\u{10cc2}', 68738), ('\u{10cc3}', 68739), ('\u{10cc4}', 68740), - ('\u{10cc5}', 68741), ('\u{10cc6}', 68742), ('\u{10cc7}', 68743), ('\u{10cc8}', 68744), - ('\u{10cc9}', 68745), ('\u{10cca}', 68746), ('\u{10ccb}', 68747), ('\u{10ccc}', 68748), - ('\u{10ccd}', 68749), ('\u{10cce}', 68750), ('\u{10ccf}', 68751), ('\u{10cd0}', 68752), - ('\u{10cd1}', 68753), ('\u{10cd2}', 68754), ('\u{10cd3}', 68755), ('\u{10cd4}', 68756), - ('\u{10cd5}', 68757), ('\u{10cd6}', 68758), ('\u{10cd7}', 68759), ('\u{10cd8}', 68760), - ('\u{10cd9}', 68761), ('\u{10cda}', 68762), ('\u{10cdb}', 68763), ('\u{10cdc}', 68764), - ('\u{10cdd}', 68765), ('\u{10cde}', 68766), ('\u{10cdf}', 68767), ('\u{10ce0}', 68768), - ('\u{10ce1}', 68769), ('\u{10ce2}', 68770), ('\u{10ce3}', 68771), ('\u{10ce4}', 68772), - ('\u{10ce5}', 68773), ('\u{10ce6}', 68774), ('\u{10ce7}', 68775), ('\u{10ce8}', 68776), - ('\u{10ce9}', 68777), ('\u{10cea}', 68778), ('\u{10ceb}', 68779), ('\u{10cec}', 68780), - ('\u{10ced}', 68781), ('\u{10cee}', 68782), ('\u{10cef}', 68783), ('\u{10cf0}', 68784), - ('\u{10cf1}', 68785), ('\u{10cf2}', 68786), ('\u{10d70}', 68944), ('\u{10d71}', 68945), - ('\u{10d72}', 68946), ('\u{10d73}', 68947), ('\u{10d74}', 68948), ('\u{10d75}', 68949), - ('\u{10d76}', 68950), ('\u{10d77}', 68951), ('\u{10d78}', 68952), ('\u{10d79}', 68953), - ('\u{10d7a}', 68954), ('\u{10d7b}', 68955), ('\u{10d7c}', 68956), ('\u{10d7d}', 68957), - ('\u{10d7e}', 68958), ('\u{10d7f}', 68959), ('\u{10d80}', 68960), ('\u{10d81}', 68961), - ('\u{10d82}', 68962), ('\u{10d83}', 68963), ('\u{10d84}', 68964), ('\u{10d85}', 68965), - ('\u{118c0}', 71840), ('\u{118c1}', 71841), ('\u{118c2}', 71842), ('\u{118c3}', 71843), - ('\u{118c4}', 71844), ('\u{118c5}', 71845), ('\u{118c6}', 71846), ('\u{118c7}', 71847), - ('\u{118c8}', 71848), ('\u{118c9}', 71849), ('\u{118ca}', 71850), ('\u{118cb}', 71851), - ('\u{118cc}', 71852), ('\u{118cd}', 71853), ('\u{118ce}', 71854), ('\u{118cf}', 71855), - ('\u{118d0}', 71856), ('\u{118d1}', 71857), ('\u{118d2}', 71858), ('\u{118d3}', 71859), - ('\u{118d4}', 71860), ('\u{118d5}', 71861), ('\u{118d6}', 71862), ('\u{118d7}', 71863), - ('\u{118d8}', 71864), ('\u{118d9}', 71865), ('\u{118da}', 71866), ('\u{118db}', 71867), - ('\u{118dc}', 71868), ('\u{118dd}', 71869), ('\u{118de}', 71870), ('\u{118df}', 71871), - ('\u{16e60}', 93760), ('\u{16e61}', 93761), ('\u{16e62}', 93762), ('\u{16e63}', 93763), - ('\u{16e64}', 93764), ('\u{16e65}', 93765), ('\u{16e66}', 93766), ('\u{16e67}', 93767), - ('\u{16e68}', 93768), ('\u{16e69}', 93769), ('\u{16e6a}', 93770), ('\u{16e6b}', 93771), - ('\u{16e6c}', 93772), ('\u{16e6d}', 93773), ('\u{16e6e}', 93774), ('\u{16e6f}', 93775), - ('\u{16e70}', 93776), ('\u{16e71}', 93777), ('\u{16e72}', 93778), ('\u{16e73}', 93779), - ('\u{16e74}', 93780), ('\u{16e75}', 93781), ('\u{16e76}', 93782), ('\u{16e77}', 93783), - ('\u{16e78}', 93784), ('\u{16e79}', 93785), ('\u{16e7a}', 93786), ('\u{16e7b}', 93787), - ('\u{16e7c}', 93788), ('\u{16e7d}', 93789), ('\u{16e7e}', 93790), ('\u{16e7f}', 93791), - ('\u{16ebb}', 93856), ('\u{16ebc}', 93857), ('\u{16ebd}', 93858), ('\u{16ebe}', 93859), - ('\u{16ebf}', 93860), ('\u{16ec0}', 93861), ('\u{16ec1}', 93862), ('\u{16ec2}', 93863), - ('\u{16ec3}', 93864), ('\u{16ec4}', 93865), ('\u{16ec5}', 93866), ('\u{16ec6}', 93867), - ('\u{16ec7}', 93868), ('\u{16ec8}', 93869), ('\u{16ec9}', 93870), ('\u{16eca}', 93871), - ('\u{16ecb}', 93872), ('\u{16ecc}', 93873), ('\u{16ecd}', 93874), ('\u{16ece}', 93875), - ('\u{16ecf}', 93876), ('\u{16ed0}', 93877), ('\u{16ed1}', 93878), ('\u{16ed2}', 93879), - ('\u{16ed3}', 93880), ('\u{1e922}', 125184), ('\u{1e923}', 125185), ('\u{1e924}', 125186), - ('\u{1e925}', 125187), ('\u{1e926}', 125188), ('\u{1e927}', 125189), ('\u{1e928}', 125190), - ('\u{1e929}', 125191), ('\u{1e92a}', 125192), ('\u{1e92b}', 125193), ('\u{1e92c}', 125194), - ('\u{1e92d}', 125195), ('\u{1e92e}', 125196), ('\u{1e92f}', 125197), ('\u{1e930}', 125198), - ('\u{1e931}', 125199), ('\u{1e932}', 125200), ('\u{1e933}', 125201), ('\u{1e934}', 125202), - ('\u{1e935}', 125203), ('\u{1e936}', 125204), ('\u{1e937}', 125205), ('\u{1e938}', 125206), - ('\u{1e939}', 125207), ('\u{1e93a}', 125208), ('\u{1e93b}', 125209), ('\u{1e93c}', 125210), - ('\u{1e93d}', 125211), ('\u{1e93e}', 125212), ('\u{1e93f}', 125213), ('\u{1e940}', 125214), - ('\u{1e941}', 125215), ('\u{1e942}', 125216), ('\u{1e943}', 125217), - ]; - - #[rustfmt::skip] - static UPPERCASE_TABLE_MULTI: &[[char; 3]; 102] = &[ - ['S', 'S', '\u{0}'], ['\u{2bc}', 'N', '\u{0}'], ['J', '\u{30c}', '\u{0}'], - ['\u{399}', '\u{308}', '\u{301}'], ['\u{3a5}', '\u{308}', '\u{301}'], - ['\u{535}', '\u{552}', '\u{0}'], ['H', '\u{331}', '\u{0}'], ['T', '\u{308}', '\u{0}'], - ['W', '\u{30a}', '\u{0}'], ['Y', '\u{30a}', '\u{0}'], ['A', '\u{2be}', '\u{0}'], - ['\u{3a5}', '\u{313}', '\u{0}'], ['\u{3a5}', '\u{313}', '\u{300}'], - ['\u{3a5}', '\u{313}', '\u{301}'], ['\u{3a5}', '\u{313}', '\u{342}'], - ['\u{1f08}', '\u{399}', '\u{0}'], ['\u{1f09}', '\u{399}', '\u{0}'], - ['\u{1f0a}', '\u{399}', '\u{0}'], ['\u{1f0b}', '\u{399}', '\u{0}'], - ['\u{1f0c}', '\u{399}', '\u{0}'], ['\u{1f0d}', '\u{399}', '\u{0}'], - ['\u{1f0e}', '\u{399}', '\u{0}'], ['\u{1f0f}', '\u{399}', '\u{0}'], - ['\u{1f08}', '\u{399}', '\u{0}'], ['\u{1f09}', '\u{399}', '\u{0}'], - ['\u{1f0a}', '\u{399}', '\u{0}'], ['\u{1f0b}', '\u{399}', '\u{0}'], - ['\u{1f0c}', '\u{399}', '\u{0}'], ['\u{1f0d}', '\u{399}', '\u{0}'], - ['\u{1f0e}', '\u{399}', '\u{0}'], ['\u{1f0f}', '\u{399}', '\u{0}'], - ['\u{1f28}', '\u{399}', '\u{0}'], ['\u{1f29}', '\u{399}', '\u{0}'], - ['\u{1f2a}', '\u{399}', '\u{0}'], ['\u{1f2b}', '\u{399}', '\u{0}'], - ['\u{1f2c}', '\u{399}', '\u{0}'], ['\u{1f2d}', '\u{399}', '\u{0}'], - ['\u{1f2e}', '\u{399}', '\u{0}'], ['\u{1f2f}', '\u{399}', '\u{0}'], - ['\u{1f28}', '\u{399}', '\u{0}'], ['\u{1f29}', '\u{399}', '\u{0}'], - ['\u{1f2a}', '\u{399}', '\u{0}'], ['\u{1f2b}', '\u{399}', '\u{0}'], - ['\u{1f2c}', '\u{399}', '\u{0}'], ['\u{1f2d}', '\u{399}', '\u{0}'], - ['\u{1f2e}', '\u{399}', '\u{0}'], ['\u{1f2f}', '\u{399}', '\u{0}'], - ['\u{1f68}', '\u{399}', '\u{0}'], ['\u{1f69}', '\u{399}', '\u{0}'], - ['\u{1f6a}', '\u{399}', '\u{0}'], ['\u{1f6b}', '\u{399}', '\u{0}'], - ['\u{1f6c}', '\u{399}', '\u{0}'], ['\u{1f6d}', '\u{399}', '\u{0}'], - ['\u{1f6e}', '\u{399}', '\u{0}'], ['\u{1f6f}', '\u{399}', '\u{0}'], - ['\u{1f68}', '\u{399}', '\u{0}'], ['\u{1f69}', '\u{399}', '\u{0}'], - ['\u{1f6a}', '\u{399}', '\u{0}'], ['\u{1f6b}', '\u{399}', '\u{0}'], - ['\u{1f6c}', '\u{399}', '\u{0}'], ['\u{1f6d}', '\u{399}', '\u{0}'], - ['\u{1f6e}', '\u{399}', '\u{0}'], ['\u{1f6f}', '\u{399}', '\u{0}'], - ['\u{1fba}', '\u{399}', '\u{0}'], ['\u{391}', '\u{399}', '\u{0}'], - ['\u{386}', '\u{399}', '\u{0}'], ['\u{391}', '\u{342}', '\u{0}'], - ['\u{391}', '\u{342}', '\u{399}'], ['\u{391}', '\u{399}', '\u{0}'], - ['\u{1fca}', '\u{399}', '\u{0}'], ['\u{397}', '\u{399}', '\u{0}'], - ['\u{389}', '\u{399}', '\u{0}'], ['\u{397}', '\u{342}', '\u{0}'], - ['\u{397}', '\u{342}', '\u{399}'], ['\u{397}', '\u{399}', '\u{0}'], - ['\u{399}', '\u{308}', '\u{300}'], ['\u{399}', '\u{308}', '\u{301}'], - ['\u{399}', '\u{342}', '\u{0}'], ['\u{399}', '\u{308}', '\u{342}'], - ['\u{3a5}', '\u{308}', '\u{300}'], ['\u{3a5}', '\u{308}', '\u{301}'], - ['\u{3a1}', '\u{313}', '\u{0}'], ['\u{3a5}', '\u{342}', '\u{0}'], - ['\u{3a5}', '\u{308}', '\u{342}'], ['\u{1ffa}', '\u{399}', '\u{0}'], - ['\u{3a9}', '\u{399}', '\u{0}'], ['\u{38f}', '\u{399}', '\u{0}'], - ['\u{3a9}', '\u{342}', '\u{0}'], ['\u{3a9}', '\u{342}', '\u{399}'], - ['\u{3a9}', '\u{399}', '\u{0}'], ['F', 'F', '\u{0}'], ['F', 'I', '\u{0}'], - ['F', 'L', '\u{0}'], ['F', 'F', 'I'], ['F', 'F', 'L'], ['S', 'T', '\u{0}'], - ['S', 'T', '\u{0}'], ['\u{544}', '\u{546}', '\u{0}'], ['\u{544}', '\u{535}', '\u{0}'], - ['\u{544}', '\u{53b}', '\u{0}'], ['\u{54e}', '\u{546}', '\u{0}'], - ['\u{544}', '\u{53d}', '\u{0}'], - ]; } From b3fcc06d3d882546ae0d30b65f6b20e7921f2095 Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Mon, 20 Oct 2025 00:17:09 +0100 Subject: [PATCH 247/358] refactor: make `unicode_data` tests normal tests Instead of generating a standalone executable to test `unicode_data`, generate normal tests in `coretests`. This ensures tests are always generated, and will be run as part of the normal testsuite. Also change the generated tests to loop over lookup tables, rather than generating a separate `assert_eq!()` statement for every codepoint. The old approach produced a massive (20,000 lines plus) file which took minutes to compile! --- core/src/unicode/mod.rs | 2 +- coretests/tests/lib.rs | 1 + coretests/tests/unicode.rs | 96 + coretests/tests/unicode/test_data.rs | 2928 ++++++++++++++++++++++++++ 4 files changed, 3026 insertions(+), 1 deletion(-) create mode 100644 coretests/tests/unicode/test_data.rs diff --git a/core/src/unicode/mod.rs b/core/src/unicode/mod.rs index 9bc4136517fae..1ee97d64d01bc 100644 --- a/core/src/unicode/mod.rs +++ b/core/src/unicode/mod.rs @@ -20,7 +20,7 @@ pub(crate) mod printable; mod rt; #[allow(unreachable_pub)] -mod unicode_data; +pub mod unicode_data; /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of /// `char` and `str` methods are based on. diff --git a/coretests/tests/lib.rs b/coretests/tests/lib.rs index c2dc3a99ab109..2f00197f82bb3 100644 --- a/coretests/tests/lib.rs +++ b/coretests/tests/lib.rs @@ -111,6 +111,7 @@ #![feature(try_find)] #![feature(try_trait_v2)] #![feature(uint_bit_width)] +#![feature(unicode_internals)] #![feature(unsize)] #![feature(unwrap_infallible)] // tidy-alphabetical-end diff --git a/coretests/tests/unicode.rs b/coretests/tests/unicode.rs index bbace0ef66ca3..445175c685784 100644 --- a/coretests/tests/unicode.rs +++ b/coretests/tests/unicode.rs @@ -1,5 +1,101 @@ +use core::unicode::unicode_data; +use std::ops::RangeInclusive; + +mod test_data; + #[test] pub fn version() { let (major, _minor, _update) = core::char::UNICODE_VERSION; assert!(major >= 10); } + +#[track_caller] +fn test_boolean_property(ranges: &[RangeInclusive], lookup: fn(char) -> bool) { + let mut start = '\u{80}'; + for range in ranges { + for c in start..*range.start() { + assert!(!lookup(c), "{c:?}"); + } + for c in range.clone() { + assert!(lookup(c), "{c:?}"); + } + start = char::from_u32(*range.end() as u32 + 1).unwrap(); + } + for c in start..=char::MAX { + assert!(!lookup(c), "{c:?}"); + } +} + +#[track_caller] +fn test_case_mapping(ranges: &[(char, [char; 3])], lookup: fn(char) -> [char; 3]) { + let mut start = '\u{80}'; + for &(key, val) in ranges { + for c in start..key { + assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}"); + } + assert_eq!(lookup(key), val, "{key:?}"); + start = char::from_u32(key as u32 + 1).unwrap(); + } + for c in start..=char::MAX { + assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}"); + } +} + +#[test] +#[cfg_attr(miri, ignore)] +fn alphabetic() { + test_boolean_property(test_data::ALPHABETIC, unicode_data::alphabetic::lookup); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn case_ignorable() { + test_boolean_property(test_data::CASE_IGNORABLE, unicode_data::case_ignorable::lookup); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn cased() { + test_boolean_property(test_data::CASED, unicode_data::cased::lookup); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn grapheme_extend() { + test_boolean_property(test_data::GRAPHEME_EXTEND, unicode_data::grapheme_extend::lookup); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn lowercase() { + test_boolean_property(test_data::LOWERCASE, unicode_data::lowercase::lookup); +} + +#[test] +fn n() { + test_boolean_property(test_data::N, unicode_data::n::lookup); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn uppercase() { + test_boolean_property(test_data::UPPERCASE, unicode_data::uppercase::lookup); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn white_space() { + test_boolean_property(test_data::WHITE_SPACE, unicode_data::white_space::lookup); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn to_lowercase() { + test_case_mapping(test_data::TO_LOWER, unicode_data::conversions::to_lower); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn to_uppercase() { + test_case_mapping(test_data::TO_UPPER, unicode_data::conversions::to_upper); +} diff --git a/coretests/tests/unicode/test_data.rs b/coretests/tests/unicode/test_data.rs new file mode 100644 index 0000000000000..f53cd7dc22705 --- /dev/null +++ b/coretests/tests/unicode/test_data.rs @@ -0,0 +1,2928 @@ +//! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually! +// ignore-tidy-filelength + +use std::ops::RangeInclusive; + +#[rustfmt::skip] +pub(super) static ALPHABETIC: &[RangeInclusive; 759] = &[ + '\u{aa}'..='\u{aa}', '\u{b5}'..='\u{b5}', '\u{ba}'..='\u{ba}', '\u{c0}'..='\u{d6}', + '\u{d8}'..='\u{f6}', '\u{f8}'..='\u{2c1}', '\u{2c6}'..='\u{2d1}', '\u{2e0}'..='\u{2e4}', + '\u{2ec}'..='\u{2ec}', '\u{2ee}'..='\u{2ee}', '\u{345}'..='\u{345}', '\u{363}'..='\u{374}', + '\u{376}'..='\u{377}', '\u{37a}'..='\u{37d}', '\u{37f}'..='\u{37f}', '\u{386}'..='\u{386}', + '\u{388}'..='\u{38a}', '\u{38c}'..='\u{38c}', '\u{38e}'..='\u{3a1}', '\u{3a3}'..='\u{3f5}', + '\u{3f7}'..='\u{481}', '\u{48a}'..='\u{52f}', '\u{531}'..='\u{556}', '\u{559}'..='\u{559}', + '\u{560}'..='\u{588}', '\u{5b0}'..='\u{5bd}', '\u{5bf}'..='\u{5bf}', '\u{5c1}'..='\u{5c2}', + '\u{5c4}'..='\u{5c5}', '\u{5c7}'..='\u{5c7}', '\u{5d0}'..='\u{5ea}', '\u{5ef}'..='\u{5f2}', + '\u{610}'..='\u{61a}', '\u{620}'..='\u{657}', '\u{659}'..='\u{65f}', '\u{66e}'..='\u{6d3}', + '\u{6d5}'..='\u{6dc}', '\u{6e1}'..='\u{6e8}', '\u{6ed}'..='\u{6ef}', '\u{6fa}'..='\u{6fc}', + '\u{6ff}'..='\u{6ff}', '\u{710}'..='\u{73f}', '\u{74d}'..='\u{7b1}', '\u{7ca}'..='\u{7ea}', + '\u{7f4}'..='\u{7f5}', '\u{7fa}'..='\u{7fa}', '\u{800}'..='\u{817}', '\u{81a}'..='\u{82c}', + '\u{840}'..='\u{858}', '\u{860}'..='\u{86a}', '\u{870}'..='\u{887}', '\u{889}'..='\u{88f}', + '\u{897}'..='\u{897}', '\u{8a0}'..='\u{8c9}', '\u{8d4}'..='\u{8df}', '\u{8e3}'..='\u{8e9}', + '\u{8f0}'..='\u{93b}', '\u{93d}'..='\u{94c}', '\u{94e}'..='\u{950}', '\u{955}'..='\u{963}', + '\u{971}'..='\u{983}', '\u{985}'..='\u{98c}', '\u{98f}'..='\u{990}', '\u{993}'..='\u{9a8}', + '\u{9aa}'..='\u{9b0}', '\u{9b2}'..='\u{9b2}', '\u{9b6}'..='\u{9b9}', '\u{9bd}'..='\u{9c4}', + '\u{9c7}'..='\u{9c8}', '\u{9cb}'..='\u{9cc}', '\u{9ce}'..='\u{9ce}', '\u{9d7}'..='\u{9d7}', + '\u{9dc}'..='\u{9dd}', '\u{9df}'..='\u{9e3}', '\u{9f0}'..='\u{9f1}', '\u{9fc}'..='\u{9fc}', + '\u{a01}'..='\u{a03}', '\u{a05}'..='\u{a0a}', '\u{a0f}'..='\u{a10}', '\u{a13}'..='\u{a28}', + '\u{a2a}'..='\u{a30}', '\u{a32}'..='\u{a33}', '\u{a35}'..='\u{a36}', '\u{a38}'..='\u{a39}', + '\u{a3e}'..='\u{a42}', '\u{a47}'..='\u{a48}', '\u{a4b}'..='\u{a4c}', '\u{a51}'..='\u{a51}', + '\u{a59}'..='\u{a5c}', '\u{a5e}'..='\u{a5e}', '\u{a70}'..='\u{a75}', '\u{a81}'..='\u{a83}', + '\u{a85}'..='\u{a8d}', '\u{a8f}'..='\u{a91}', '\u{a93}'..='\u{aa8}', '\u{aaa}'..='\u{ab0}', + '\u{ab2}'..='\u{ab3}', '\u{ab5}'..='\u{ab9}', '\u{abd}'..='\u{ac5}', '\u{ac7}'..='\u{ac9}', + '\u{acb}'..='\u{acc}', '\u{ad0}'..='\u{ad0}', '\u{ae0}'..='\u{ae3}', '\u{af9}'..='\u{afc}', + '\u{b01}'..='\u{b03}', '\u{b05}'..='\u{b0c}', '\u{b0f}'..='\u{b10}', '\u{b13}'..='\u{b28}', + '\u{b2a}'..='\u{b30}', '\u{b32}'..='\u{b33}', '\u{b35}'..='\u{b39}', '\u{b3d}'..='\u{b44}', + '\u{b47}'..='\u{b48}', '\u{b4b}'..='\u{b4c}', '\u{b56}'..='\u{b57}', '\u{b5c}'..='\u{b5d}', + '\u{b5f}'..='\u{b63}', '\u{b71}'..='\u{b71}', '\u{b82}'..='\u{b83}', '\u{b85}'..='\u{b8a}', + '\u{b8e}'..='\u{b90}', '\u{b92}'..='\u{b95}', '\u{b99}'..='\u{b9a}', '\u{b9c}'..='\u{b9c}', + '\u{b9e}'..='\u{b9f}', '\u{ba3}'..='\u{ba4}', '\u{ba8}'..='\u{baa}', '\u{bae}'..='\u{bb9}', + '\u{bbe}'..='\u{bc2}', '\u{bc6}'..='\u{bc8}', '\u{bca}'..='\u{bcc}', '\u{bd0}'..='\u{bd0}', + '\u{bd7}'..='\u{bd7}', '\u{c00}'..='\u{c0c}', '\u{c0e}'..='\u{c10}', '\u{c12}'..='\u{c28}', + '\u{c2a}'..='\u{c39}', '\u{c3d}'..='\u{c44}', '\u{c46}'..='\u{c48}', '\u{c4a}'..='\u{c4c}', + '\u{c55}'..='\u{c56}', '\u{c58}'..='\u{c5a}', '\u{c5c}'..='\u{c5d}', '\u{c60}'..='\u{c63}', + '\u{c80}'..='\u{c83}', '\u{c85}'..='\u{c8c}', '\u{c8e}'..='\u{c90}', '\u{c92}'..='\u{ca8}', + '\u{caa}'..='\u{cb3}', '\u{cb5}'..='\u{cb9}', '\u{cbd}'..='\u{cc4}', '\u{cc6}'..='\u{cc8}', + '\u{cca}'..='\u{ccc}', '\u{cd5}'..='\u{cd6}', '\u{cdc}'..='\u{cde}', '\u{ce0}'..='\u{ce3}', + '\u{cf1}'..='\u{cf3}', '\u{d00}'..='\u{d0c}', '\u{d0e}'..='\u{d10}', '\u{d12}'..='\u{d3a}', + '\u{d3d}'..='\u{d44}', '\u{d46}'..='\u{d48}', '\u{d4a}'..='\u{d4c}', '\u{d4e}'..='\u{d4e}', + '\u{d54}'..='\u{d57}', '\u{d5f}'..='\u{d63}', '\u{d7a}'..='\u{d7f}', '\u{d81}'..='\u{d83}', + '\u{d85}'..='\u{d96}', '\u{d9a}'..='\u{db1}', '\u{db3}'..='\u{dbb}', '\u{dbd}'..='\u{dbd}', + '\u{dc0}'..='\u{dc6}', '\u{dcf}'..='\u{dd4}', '\u{dd6}'..='\u{dd6}', '\u{dd8}'..='\u{ddf}', + '\u{df2}'..='\u{df3}', '\u{e01}'..='\u{e3a}', '\u{e40}'..='\u{e46}', '\u{e4d}'..='\u{e4d}', + '\u{e81}'..='\u{e82}', '\u{e84}'..='\u{e84}', '\u{e86}'..='\u{e8a}', '\u{e8c}'..='\u{ea3}', + '\u{ea5}'..='\u{ea5}', '\u{ea7}'..='\u{eb9}', '\u{ebb}'..='\u{ebd}', '\u{ec0}'..='\u{ec4}', + '\u{ec6}'..='\u{ec6}', '\u{ecd}'..='\u{ecd}', '\u{edc}'..='\u{edf}', '\u{f00}'..='\u{f00}', + '\u{f40}'..='\u{f47}', '\u{f49}'..='\u{f6c}', '\u{f71}'..='\u{f83}', '\u{f88}'..='\u{f97}', + '\u{f99}'..='\u{fbc}', '\u{1000}'..='\u{1036}', '\u{1038}'..='\u{1038}', + '\u{103b}'..='\u{103f}', '\u{1050}'..='\u{108f}', '\u{109a}'..='\u{109d}', + '\u{10a0}'..='\u{10c5}', '\u{10c7}'..='\u{10c7}', '\u{10cd}'..='\u{10cd}', + '\u{10d0}'..='\u{10fa}', '\u{10fc}'..='\u{1248}', '\u{124a}'..='\u{124d}', + '\u{1250}'..='\u{1256}', '\u{1258}'..='\u{1258}', '\u{125a}'..='\u{125d}', + '\u{1260}'..='\u{1288}', '\u{128a}'..='\u{128d}', '\u{1290}'..='\u{12b0}', + '\u{12b2}'..='\u{12b5}', '\u{12b8}'..='\u{12be}', '\u{12c0}'..='\u{12c0}', + '\u{12c2}'..='\u{12c5}', '\u{12c8}'..='\u{12d6}', '\u{12d8}'..='\u{1310}', + '\u{1312}'..='\u{1315}', '\u{1318}'..='\u{135a}', '\u{1380}'..='\u{138f}', + '\u{13a0}'..='\u{13f5}', '\u{13f8}'..='\u{13fd}', '\u{1401}'..='\u{166c}', + '\u{166f}'..='\u{167f}', '\u{1681}'..='\u{169a}', '\u{16a0}'..='\u{16ea}', + '\u{16ee}'..='\u{16f8}', '\u{1700}'..='\u{1713}', '\u{171f}'..='\u{1733}', + '\u{1740}'..='\u{1753}', '\u{1760}'..='\u{176c}', '\u{176e}'..='\u{1770}', + '\u{1772}'..='\u{1773}', '\u{1780}'..='\u{17b3}', '\u{17b6}'..='\u{17c8}', + '\u{17d7}'..='\u{17d7}', '\u{17dc}'..='\u{17dc}', '\u{1820}'..='\u{1878}', + '\u{1880}'..='\u{18aa}', '\u{18b0}'..='\u{18f5}', '\u{1900}'..='\u{191e}', + '\u{1920}'..='\u{192b}', '\u{1930}'..='\u{1938}', '\u{1950}'..='\u{196d}', + '\u{1970}'..='\u{1974}', '\u{1980}'..='\u{19ab}', '\u{19b0}'..='\u{19c9}', + '\u{1a00}'..='\u{1a1b}', '\u{1a20}'..='\u{1a5e}', '\u{1a61}'..='\u{1a74}', + '\u{1aa7}'..='\u{1aa7}', '\u{1abf}'..='\u{1ac0}', '\u{1acc}'..='\u{1ace}', + '\u{1b00}'..='\u{1b33}', '\u{1b35}'..='\u{1b43}', '\u{1b45}'..='\u{1b4c}', + '\u{1b80}'..='\u{1ba9}', '\u{1bac}'..='\u{1baf}', '\u{1bba}'..='\u{1be5}', + '\u{1be7}'..='\u{1bf1}', '\u{1c00}'..='\u{1c36}', '\u{1c4d}'..='\u{1c4f}', + '\u{1c5a}'..='\u{1c7d}', '\u{1c80}'..='\u{1c8a}', '\u{1c90}'..='\u{1cba}', + '\u{1cbd}'..='\u{1cbf}', '\u{1ce9}'..='\u{1cec}', '\u{1cee}'..='\u{1cf3}', + '\u{1cf5}'..='\u{1cf6}', '\u{1cfa}'..='\u{1cfa}', '\u{1d00}'..='\u{1dbf}', + '\u{1dd3}'..='\u{1df4}', '\u{1e00}'..='\u{1f15}', '\u{1f18}'..='\u{1f1d}', + '\u{1f20}'..='\u{1f45}', '\u{1f48}'..='\u{1f4d}', '\u{1f50}'..='\u{1f57}', + '\u{1f59}'..='\u{1f59}', '\u{1f5b}'..='\u{1f5b}', '\u{1f5d}'..='\u{1f5d}', + '\u{1f5f}'..='\u{1f7d}', '\u{1f80}'..='\u{1fb4}', '\u{1fb6}'..='\u{1fbc}', + '\u{1fbe}'..='\u{1fbe}', '\u{1fc2}'..='\u{1fc4}', '\u{1fc6}'..='\u{1fcc}', + '\u{1fd0}'..='\u{1fd3}', '\u{1fd6}'..='\u{1fdb}', '\u{1fe0}'..='\u{1fec}', + '\u{1ff2}'..='\u{1ff4}', '\u{1ff6}'..='\u{1ffc}', '\u{2071}'..='\u{2071}', + '\u{207f}'..='\u{207f}', '\u{2090}'..='\u{209c}', '\u{2102}'..='\u{2102}', + '\u{2107}'..='\u{2107}', '\u{210a}'..='\u{2113}', '\u{2115}'..='\u{2115}', + '\u{2119}'..='\u{211d}', '\u{2124}'..='\u{2124}', '\u{2126}'..='\u{2126}', + '\u{2128}'..='\u{2128}', '\u{212a}'..='\u{212d}', '\u{212f}'..='\u{2139}', + '\u{213c}'..='\u{213f}', '\u{2145}'..='\u{2149}', '\u{214e}'..='\u{214e}', + '\u{2160}'..='\u{2188}', '\u{24b6}'..='\u{24e9}', '\u{2c00}'..='\u{2ce4}', + '\u{2ceb}'..='\u{2cee}', '\u{2cf2}'..='\u{2cf3}', '\u{2d00}'..='\u{2d25}', + '\u{2d27}'..='\u{2d27}', '\u{2d2d}'..='\u{2d2d}', '\u{2d30}'..='\u{2d67}', + '\u{2d6f}'..='\u{2d6f}', '\u{2d80}'..='\u{2d96}', '\u{2da0}'..='\u{2da6}', + '\u{2da8}'..='\u{2dae}', '\u{2db0}'..='\u{2db6}', '\u{2db8}'..='\u{2dbe}', + '\u{2dc0}'..='\u{2dc6}', '\u{2dc8}'..='\u{2dce}', '\u{2dd0}'..='\u{2dd6}', + '\u{2dd8}'..='\u{2dde}', '\u{2de0}'..='\u{2dff}', '\u{2e2f}'..='\u{2e2f}', + '\u{3005}'..='\u{3007}', '\u{3021}'..='\u{3029}', '\u{3031}'..='\u{3035}', + '\u{3038}'..='\u{303c}', '\u{3041}'..='\u{3096}', '\u{309d}'..='\u{309f}', + '\u{30a1}'..='\u{30fa}', '\u{30fc}'..='\u{30ff}', '\u{3105}'..='\u{312f}', + '\u{3131}'..='\u{318e}', '\u{31a0}'..='\u{31bf}', '\u{31f0}'..='\u{31ff}', + '\u{3400}'..='\u{4dbf}', '\u{4e00}'..='\u{a48c}', '\u{a4d0}'..='\u{a4fd}', + '\u{a500}'..='\u{a60c}', '\u{a610}'..='\u{a61f}', '\u{a62a}'..='\u{a62b}', + '\u{a640}'..='\u{a66e}', '\u{a674}'..='\u{a67b}', '\u{a67f}'..='\u{a6ef}', + '\u{a717}'..='\u{a71f}', '\u{a722}'..='\u{a788}', '\u{a78b}'..='\u{a7dc}', + '\u{a7f1}'..='\u{a805}', '\u{a807}'..='\u{a827}', '\u{a840}'..='\u{a873}', + '\u{a880}'..='\u{a8c3}', '\u{a8c5}'..='\u{a8c5}', '\u{a8f2}'..='\u{a8f7}', + '\u{a8fb}'..='\u{a8fb}', '\u{a8fd}'..='\u{a8ff}', '\u{a90a}'..='\u{a92a}', + '\u{a930}'..='\u{a952}', '\u{a960}'..='\u{a97c}', '\u{a980}'..='\u{a9b2}', + '\u{a9b4}'..='\u{a9bf}', '\u{a9cf}'..='\u{a9cf}', '\u{a9e0}'..='\u{a9ef}', + '\u{a9fa}'..='\u{a9fe}', '\u{aa00}'..='\u{aa36}', '\u{aa40}'..='\u{aa4d}', + '\u{aa60}'..='\u{aa76}', '\u{aa7a}'..='\u{aabe}', '\u{aac0}'..='\u{aac0}', + '\u{aac2}'..='\u{aac2}', '\u{aadb}'..='\u{aadd}', '\u{aae0}'..='\u{aaef}', + '\u{aaf2}'..='\u{aaf5}', '\u{ab01}'..='\u{ab06}', '\u{ab09}'..='\u{ab0e}', + '\u{ab11}'..='\u{ab16}', '\u{ab20}'..='\u{ab26}', '\u{ab28}'..='\u{ab2e}', + '\u{ab30}'..='\u{ab5a}', '\u{ab5c}'..='\u{ab69}', '\u{ab70}'..='\u{abea}', + '\u{ac00}'..='\u{d7a3}', '\u{d7b0}'..='\u{d7c6}', '\u{d7cb}'..='\u{d7fb}', + '\u{f900}'..='\u{fa6d}', '\u{fa70}'..='\u{fad9}', '\u{fb00}'..='\u{fb06}', + '\u{fb13}'..='\u{fb17}', '\u{fb1d}'..='\u{fb28}', '\u{fb2a}'..='\u{fb36}', + '\u{fb38}'..='\u{fb3c}', '\u{fb3e}'..='\u{fb3e}', '\u{fb40}'..='\u{fb41}', + '\u{fb43}'..='\u{fb44}', '\u{fb46}'..='\u{fbb1}', '\u{fbd3}'..='\u{fd3d}', + '\u{fd50}'..='\u{fd8f}', '\u{fd92}'..='\u{fdc7}', '\u{fdf0}'..='\u{fdfb}', + '\u{fe70}'..='\u{fe74}', '\u{fe76}'..='\u{fefc}', '\u{ff21}'..='\u{ff3a}', + '\u{ff41}'..='\u{ff5a}', '\u{ff66}'..='\u{ffbe}', '\u{ffc2}'..='\u{ffc7}', + '\u{ffca}'..='\u{ffcf}', '\u{ffd2}'..='\u{ffd7}', '\u{ffda}'..='\u{ffdc}', + '\u{10000}'..='\u{1000b}', '\u{1000d}'..='\u{10026}', '\u{10028}'..='\u{1003a}', + '\u{1003c}'..='\u{1003d}', '\u{1003f}'..='\u{1004d}', '\u{10050}'..='\u{1005d}', + '\u{10080}'..='\u{100fa}', '\u{10140}'..='\u{10174}', '\u{10280}'..='\u{1029c}', + '\u{102a0}'..='\u{102d0}', '\u{10300}'..='\u{1031f}', '\u{1032d}'..='\u{1034a}', + '\u{10350}'..='\u{1037a}', '\u{10380}'..='\u{1039d}', '\u{103a0}'..='\u{103c3}', + '\u{103c8}'..='\u{103cf}', '\u{103d1}'..='\u{103d5}', '\u{10400}'..='\u{1049d}', + '\u{104b0}'..='\u{104d3}', '\u{104d8}'..='\u{104fb}', '\u{10500}'..='\u{10527}', + '\u{10530}'..='\u{10563}', '\u{10570}'..='\u{1057a}', '\u{1057c}'..='\u{1058a}', + '\u{1058c}'..='\u{10592}', '\u{10594}'..='\u{10595}', '\u{10597}'..='\u{105a1}', + '\u{105a3}'..='\u{105b1}', '\u{105b3}'..='\u{105b9}', '\u{105bb}'..='\u{105bc}', + '\u{105c0}'..='\u{105f3}', '\u{10600}'..='\u{10736}', '\u{10740}'..='\u{10755}', + '\u{10760}'..='\u{10767}', '\u{10780}'..='\u{10785}', '\u{10787}'..='\u{107b0}', + '\u{107b2}'..='\u{107ba}', '\u{10800}'..='\u{10805}', '\u{10808}'..='\u{10808}', + '\u{1080a}'..='\u{10835}', '\u{10837}'..='\u{10838}', '\u{1083c}'..='\u{1083c}', + '\u{1083f}'..='\u{10855}', '\u{10860}'..='\u{10876}', '\u{10880}'..='\u{1089e}', + '\u{108e0}'..='\u{108f2}', '\u{108f4}'..='\u{108f5}', '\u{10900}'..='\u{10915}', + '\u{10920}'..='\u{10939}', '\u{10940}'..='\u{10959}', '\u{10980}'..='\u{109b7}', + '\u{109be}'..='\u{109bf}', '\u{10a00}'..='\u{10a03}', '\u{10a05}'..='\u{10a06}', + '\u{10a0c}'..='\u{10a13}', '\u{10a15}'..='\u{10a17}', '\u{10a19}'..='\u{10a35}', + '\u{10a60}'..='\u{10a7c}', '\u{10a80}'..='\u{10a9c}', '\u{10ac0}'..='\u{10ac7}', + '\u{10ac9}'..='\u{10ae4}', '\u{10b00}'..='\u{10b35}', '\u{10b40}'..='\u{10b55}', + '\u{10b60}'..='\u{10b72}', '\u{10b80}'..='\u{10b91}', '\u{10c00}'..='\u{10c48}', + '\u{10c80}'..='\u{10cb2}', '\u{10cc0}'..='\u{10cf2}', '\u{10d00}'..='\u{10d27}', + '\u{10d4a}'..='\u{10d65}', '\u{10d69}'..='\u{10d69}', '\u{10d6f}'..='\u{10d85}', + '\u{10e80}'..='\u{10ea9}', '\u{10eab}'..='\u{10eac}', '\u{10eb0}'..='\u{10eb1}', + '\u{10ec2}'..='\u{10ec7}', '\u{10efa}'..='\u{10efc}', '\u{10f00}'..='\u{10f1c}', + '\u{10f27}'..='\u{10f27}', '\u{10f30}'..='\u{10f45}', '\u{10f70}'..='\u{10f81}', + '\u{10fb0}'..='\u{10fc4}', '\u{10fe0}'..='\u{10ff6}', '\u{11000}'..='\u{11045}', + '\u{11071}'..='\u{11075}', '\u{11080}'..='\u{110b8}', '\u{110c2}'..='\u{110c2}', + '\u{110d0}'..='\u{110e8}', '\u{11100}'..='\u{11132}', '\u{11144}'..='\u{11147}', + '\u{11150}'..='\u{11172}', '\u{11176}'..='\u{11176}', '\u{11180}'..='\u{111bf}', + '\u{111c1}'..='\u{111c4}', '\u{111ce}'..='\u{111cf}', '\u{111da}'..='\u{111da}', + '\u{111dc}'..='\u{111dc}', '\u{11200}'..='\u{11211}', '\u{11213}'..='\u{11234}', + '\u{11237}'..='\u{11237}', '\u{1123e}'..='\u{11241}', '\u{11280}'..='\u{11286}', + '\u{11288}'..='\u{11288}', '\u{1128a}'..='\u{1128d}', '\u{1128f}'..='\u{1129d}', + '\u{1129f}'..='\u{112a8}', '\u{112b0}'..='\u{112e8}', '\u{11300}'..='\u{11303}', + '\u{11305}'..='\u{1130c}', '\u{1130f}'..='\u{11310}', '\u{11313}'..='\u{11328}', + '\u{1132a}'..='\u{11330}', '\u{11332}'..='\u{11333}', '\u{11335}'..='\u{11339}', + '\u{1133d}'..='\u{11344}', '\u{11347}'..='\u{11348}', '\u{1134b}'..='\u{1134c}', + '\u{11350}'..='\u{11350}', '\u{11357}'..='\u{11357}', '\u{1135d}'..='\u{11363}', + '\u{11380}'..='\u{11389}', '\u{1138b}'..='\u{1138b}', '\u{1138e}'..='\u{1138e}', + '\u{11390}'..='\u{113b5}', '\u{113b7}'..='\u{113c0}', '\u{113c2}'..='\u{113c2}', + '\u{113c5}'..='\u{113c5}', '\u{113c7}'..='\u{113ca}', '\u{113cc}'..='\u{113cd}', + '\u{113d1}'..='\u{113d1}', '\u{113d3}'..='\u{113d3}', '\u{11400}'..='\u{11441}', + '\u{11443}'..='\u{11445}', '\u{11447}'..='\u{1144a}', '\u{1145f}'..='\u{11461}', + '\u{11480}'..='\u{114c1}', '\u{114c4}'..='\u{114c5}', '\u{114c7}'..='\u{114c7}', + '\u{11580}'..='\u{115b5}', '\u{115b8}'..='\u{115be}', '\u{115d8}'..='\u{115dd}', + '\u{11600}'..='\u{1163e}', '\u{11640}'..='\u{11640}', '\u{11644}'..='\u{11644}', + '\u{11680}'..='\u{116b5}', '\u{116b8}'..='\u{116b8}', '\u{11700}'..='\u{1171a}', + '\u{1171d}'..='\u{1172a}', '\u{11740}'..='\u{11746}', '\u{11800}'..='\u{11838}', + '\u{118a0}'..='\u{118df}', '\u{118ff}'..='\u{11906}', '\u{11909}'..='\u{11909}', + '\u{1190c}'..='\u{11913}', '\u{11915}'..='\u{11916}', '\u{11918}'..='\u{11935}', + '\u{11937}'..='\u{11938}', '\u{1193b}'..='\u{1193c}', '\u{1193f}'..='\u{11942}', + '\u{119a0}'..='\u{119a7}', '\u{119aa}'..='\u{119d7}', '\u{119da}'..='\u{119df}', + '\u{119e1}'..='\u{119e1}', '\u{119e3}'..='\u{119e4}', '\u{11a00}'..='\u{11a32}', + '\u{11a35}'..='\u{11a3e}', '\u{11a50}'..='\u{11a97}', '\u{11a9d}'..='\u{11a9d}', + '\u{11ab0}'..='\u{11af8}', '\u{11b60}'..='\u{11b67}', '\u{11bc0}'..='\u{11be0}', + '\u{11c00}'..='\u{11c08}', '\u{11c0a}'..='\u{11c36}', '\u{11c38}'..='\u{11c3e}', + '\u{11c40}'..='\u{11c40}', '\u{11c72}'..='\u{11c8f}', '\u{11c92}'..='\u{11ca7}', + '\u{11ca9}'..='\u{11cb6}', '\u{11d00}'..='\u{11d06}', '\u{11d08}'..='\u{11d09}', + '\u{11d0b}'..='\u{11d36}', '\u{11d3a}'..='\u{11d3a}', '\u{11d3c}'..='\u{11d3d}', + '\u{11d3f}'..='\u{11d41}', '\u{11d43}'..='\u{11d43}', '\u{11d46}'..='\u{11d47}', + '\u{11d60}'..='\u{11d65}', '\u{11d67}'..='\u{11d68}', '\u{11d6a}'..='\u{11d8e}', + '\u{11d90}'..='\u{11d91}', '\u{11d93}'..='\u{11d96}', '\u{11d98}'..='\u{11d98}', + '\u{11db0}'..='\u{11ddb}', '\u{11ee0}'..='\u{11ef6}', '\u{11f00}'..='\u{11f10}', + '\u{11f12}'..='\u{11f3a}', '\u{11f3e}'..='\u{11f40}', '\u{11fb0}'..='\u{11fb0}', + '\u{12000}'..='\u{12399}', '\u{12400}'..='\u{1246e}', '\u{12480}'..='\u{12543}', + '\u{12f90}'..='\u{12ff0}', '\u{13000}'..='\u{1342f}', '\u{13441}'..='\u{13446}', + '\u{13460}'..='\u{143fa}', '\u{14400}'..='\u{14646}', '\u{16100}'..='\u{1612e}', + '\u{16800}'..='\u{16a38}', '\u{16a40}'..='\u{16a5e}', '\u{16a70}'..='\u{16abe}', + '\u{16ad0}'..='\u{16aed}', '\u{16b00}'..='\u{16b2f}', '\u{16b40}'..='\u{16b43}', + '\u{16b63}'..='\u{16b77}', '\u{16b7d}'..='\u{16b8f}', '\u{16d40}'..='\u{16d6c}', + '\u{16e40}'..='\u{16e7f}', '\u{16ea0}'..='\u{16eb8}', '\u{16ebb}'..='\u{16ed3}', + '\u{16f00}'..='\u{16f4a}', '\u{16f4f}'..='\u{16f87}', '\u{16f8f}'..='\u{16f9f}', + '\u{16fe0}'..='\u{16fe1}', '\u{16fe3}'..='\u{16fe3}', '\u{16ff0}'..='\u{16ff6}', + '\u{17000}'..='\u{18cd5}', '\u{18cff}'..='\u{18d1e}', '\u{18d80}'..='\u{18df2}', + '\u{1aff0}'..='\u{1aff3}', '\u{1aff5}'..='\u{1affb}', '\u{1affd}'..='\u{1affe}', + '\u{1b000}'..='\u{1b122}', '\u{1b132}'..='\u{1b132}', '\u{1b150}'..='\u{1b152}', + '\u{1b155}'..='\u{1b155}', '\u{1b164}'..='\u{1b167}', '\u{1b170}'..='\u{1b2fb}', + '\u{1bc00}'..='\u{1bc6a}', '\u{1bc70}'..='\u{1bc7c}', '\u{1bc80}'..='\u{1bc88}', + '\u{1bc90}'..='\u{1bc99}', '\u{1bc9e}'..='\u{1bc9e}', '\u{1d400}'..='\u{1d454}', + '\u{1d456}'..='\u{1d49c}', '\u{1d49e}'..='\u{1d49f}', '\u{1d4a2}'..='\u{1d4a2}', + '\u{1d4a5}'..='\u{1d4a6}', '\u{1d4a9}'..='\u{1d4ac}', '\u{1d4ae}'..='\u{1d4b9}', + '\u{1d4bb}'..='\u{1d4bb}', '\u{1d4bd}'..='\u{1d4c3}', '\u{1d4c5}'..='\u{1d505}', + '\u{1d507}'..='\u{1d50a}', '\u{1d50d}'..='\u{1d514}', '\u{1d516}'..='\u{1d51c}', + '\u{1d51e}'..='\u{1d539}', '\u{1d53b}'..='\u{1d53e}', '\u{1d540}'..='\u{1d544}', + '\u{1d546}'..='\u{1d546}', '\u{1d54a}'..='\u{1d550}', '\u{1d552}'..='\u{1d6a5}', + '\u{1d6a8}'..='\u{1d6c0}', '\u{1d6c2}'..='\u{1d6da}', '\u{1d6dc}'..='\u{1d6fa}', + '\u{1d6fc}'..='\u{1d714}', '\u{1d716}'..='\u{1d734}', '\u{1d736}'..='\u{1d74e}', + '\u{1d750}'..='\u{1d76e}', '\u{1d770}'..='\u{1d788}', '\u{1d78a}'..='\u{1d7a8}', + '\u{1d7aa}'..='\u{1d7c2}', '\u{1d7c4}'..='\u{1d7cb}', '\u{1df00}'..='\u{1df1e}', + '\u{1df25}'..='\u{1df2a}', '\u{1e000}'..='\u{1e006}', '\u{1e008}'..='\u{1e018}', + '\u{1e01b}'..='\u{1e021}', '\u{1e023}'..='\u{1e024}', '\u{1e026}'..='\u{1e02a}', + '\u{1e030}'..='\u{1e06d}', '\u{1e08f}'..='\u{1e08f}', '\u{1e100}'..='\u{1e12c}', + '\u{1e137}'..='\u{1e13d}', '\u{1e14e}'..='\u{1e14e}', '\u{1e290}'..='\u{1e2ad}', + '\u{1e2c0}'..='\u{1e2eb}', '\u{1e4d0}'..='\u{1e4eb}', '\u{1e5d0}'..='\u{1e5ed}', + '\u{1e5f0}'..='\u{1e5f0}', '\u{1e6c0}'..='\u{1e6de}', '\u{1e6e0}'..='\u{1e6f5}', + '\u{1e6fe}'..='\u{1e6ff}', '\u{1e7e0}'..='\u{1e7e6}', '\u{1e7e8}'..='\u{1e7eb}', + '\u{1e7ed}'..='\u{1e7ee}', '\u{1e7f0}'..='\u{1e7fe}', '\u{1e800}'..='\u{1e8c4}', + '\u{1e900}'..='\u{1e943}', '\u{1e947}'..='\u{1e947}', '\u{1e94b}'..='\u{1e94b}', + '\u{1ee00}'..='\u{1ee03}', '\u{1ee05}'..='\u{1ee1f}', '\u{1ee21}'..='\u{1ee22}', + '\u{1ee24}'..='\u{1ee24}', '\u{1ee27}'..='\u{1ee27}', '\u{1ee29}'..='\u{1ee32}', + '\u{1ee34}'..='\u{1ee37}', '\u{1ee39}'..='\u{1ee39}', '\u{1ee3b}'..='\u{1ee3b}', + '\u{1ee42}'..='\u{1ee42}', '\u{1ee47}'..='\u{1ee47}', '\u{1ee49}'..='\u{1ee49}', + '\u{1ee4b}'..='\u{1ee4b}', '\u{1ee4d}'..='\u{1ee4f}', '\u{1ee51}'..='\u{1ee52}', + '\u{1ee54}'..='\u{1ee54}', '\u{1ee57}'..='\u{1ee57}', '\u{1ee59}'..='\u{1ee59}', + '\u{1ee5b}'..='\u{1ee5b}', '\u{1ee5d}'..='\u{1ee5d}', '\u{1ee5f}'..='\u{1ee5f}', + '\u{1ee61}'..='\u{1ee62}', '\u{1ee64}'..='\u{1ee64}', '\u{1ee67}'..='\u{1ee6a}', + '\u{1ee6c}'..='\u{1ee72}', '\u{1ee74}'..='\u{1ee77}', '\u{1ee79}'..='\u{1ee7c}', + '\u{1ee7e}'..='\u{1ee7e}', '\u{1ee80}'..='\u{1ee89}', '\u{1ee8b}'..='\u{1ee9b}', + '\u{1eea1}'..='\u{1eea3}', '\u{1eea5}'..='\u{1eea9}', '\u{1eeab}'..='\u{1eebb}', + '\u{1f130}'..='\u{1f149}', '\u{1f150}'..='\u{1f169}', '\u{1f170}'..='\u{1f189}', + '\u{20000}'..='\u{2a6df}', '\u{2a700}'..='\u{2b81d}', '\u{2b820}'..='\u{2cead}', + '\u{2ceb0}'..='\u{2ebe0}', '\u{2ebf0}'..='\u{2ee5d}', '\u{2f800}'..='\u{2fa1d}', + '\u{30000}'..='\u{3134a}', '\u{31350}'..='\u{33479}', +]; + +#[rustfmt::skip] +pub(super) static CASE_IGNORABLE: &[RangeInclusive; 459] = &[ + '\u{a8}'..='\u{a8}', '\u{ad}'..='\u{ad}', '\u{af}'..='\u{af}', '\u{b4}'..='\u{b4}', + '\u{b7}'..='\u{b8}', '\u{2b0}'..='\u{36f}', '\u{374}'..='\u{375}', '\u{37a}'..='\u{37a}', + '\u{384}'..='\u{385}', '\u{387}'..='\u{387}', '\u{483}'..='\u{489}', '\u{559}'..='\u{559}', + '\u{55f}'..='\u{55f}', '\u{591}'..='\u{5bd}', '\u{5bf}'..='\u{5bf}', '\u{5c1}'..='\u{5c2}', + '\u{5c4}'..='\u{5c5}', '\u{5c7}'..='\u{5c7}', '\u{5f4}'..='\u{5f4}', '\u{600}'..='\u{605}', + '\u{610}'..='\u{61a}', '\u{61c}'..='\u{61c}', '\u{640}'..='\u{640}', '\u{64b}'..='\u{65f}', + '\u{670}'..='\u{670}', '\u{6d6}'..='\u{6dd}', '\u{6df}'..='\u{6e8}', '\u{6ea}'..='\u{6ed}', + '\u{70f}'..='\u{70f}', '\u{711}'..='\u{711}', '\u{730}'..='\u{74a}', '\u{7a6}'..='\u{7b0}', + '\u{7eb}'..='\u{7f5}', '\u{7fa}'..='\u{7fa}', '\u{7fd}'..='\u{7fd}', '\u{816}'..='\u{82d}', + '\u{859}'..='\u{85b}', '\u{888}'..='\u{888}', '\u{890}'..='\u{891}', '\u{897}'..='\u{89f}', + '\u{8c9}'..='\u{902}', '\u{93a}'..='\u{93a}', '\u{93c}'..='\u{93c}', '\u{941}'..='\u{948}', + '\u{94d}'..='\u{94d}', '\u{951}'..='\u{957}', '\u{962}'..='\u{963}', '\u{971}'..='\u{971}', + '\u{981}'..='\u{981}', '\u{9bc}'..='\u{9bc}', '\u{9c1}'..='\u{9c4}', '\u{9cd}'..='\u{9cd}', + '\u{9e2}'..='\u{9e3}', '\u{9fe}'..='\u{9fe}', '\u{a01}'..='\u{a02}', '\u{a3c}'..='\u{a3c}', + '\u{a41}'..='\u{a42}', '\u{a47}'..='\u{a48}', '\u{a4b}'..='\u{a4d}', '\u{a51}'..='\u{a51}', + '\u{a70}'..='\u{a71}', '\u{a75}'..='\u{a75}', '\u{a81}'..='\u{a82}', '\u{abc}'..='\u{abc}', + '\u{ac1}'..='\u{ac5}', '\u{ac7}'..='\u{ac8}', '\u{acd}'..='\u{acd}', '\u{ae2}'..='\u{ae3}', + '\u{afa}'..='\u{aff}', '\u{b01}'..='\u{b01}', '\u{b3c}'..='\u{b3c}', '\u{b3f}'..='\u{b3f}', + '\u{b41}'..='\u{b44}', '\u{b4d}'..='\u{b4d}', '\u{b55}'..='\u{b56}', '\u{b62}'..='\u{b63}', + '\u{b82}'..='\u{b82}', '\u{bc0}'..='\u{bc0}', '\u{bcd}'..='\u{bcd}', '\u{c00}'..='\u{c00}', + '\u{c04}'..='\u{c04}', '\u{c3c}'..='\u{c3c}', '\u{c3e}'..='\u{c40}', '\u{c46}'..='\u{c48}', + '\u{c4a}'..='\u{c4d}', '\u{c55}'..='\u{c56}', '\u{c62}'..='\u{c63}', '\u{c81}'..='\u{c81}', + '\u{cbc}'..='\u{cbc}', '\u{cbf}'..='\u{cbf}', '\u{cc6}'..='\u{cc6}', '\u{ccc}'..='\u{ccd}', + '\u{ce2}'..='\u{ce3}', '\u{d00}'..='\u{d01}', '\u{d3b}'..='\u{d3c}', '\u{d41}'..='\u{d44}', + '\u{d4d}'..='\u{d4d}', '\u{d62}'..='\u{d63}', '\u{d81}'..='\u{d81}', '\u{dca}'..='\u{dca}', + '\u{dd2}'..='\u{dd4}', '\u{dd6}'..='\u{dd6}', '\u{e31}'..='\u{e31}', '\u{e34}'..='\u{e3a}', + '\u{e46}'..='\u{e4e}', '\u{eb1}'..='\u{eb1}', '\u{eb4}'..='\u{ebc}', '\u{ec6}'..='\u{ec6}', + '\u{ec8}'..='\u{ece}', '\u{f18}'..='\u{f19}', '\u{f35}'..='\u{f35}', '\u{f37}'..='\u{f37}', + '\u{f39}'..='\u{f39}', '\u{f71}'..='\u{f7e}', '\u{f80}'..='\u{f84}', '\u{f86}'..='\u{f87}', + '\u{f8d}'..='\u{f97}', '\u{f99}'..='\u{fbc}', '\u{fc6}'..='\u{fc6}', + '\u{102d}'..='\u{1030}', '\u{1032}'..='\u{1037}', '\u{1039}'..='\u{103a}', + '\u{103d}'..='\u{103e}', '\u{1058}'..='\u{1059}', '\u{105e}'..='\u{1060}', + '\u{1071}'..='\u{1074}', '\u{1082}'..='\u{1082}', '\u{1085}'..='\u{1086}', + '\u{108d}'..='\u{108d}', '\u{109d}'..='\u{109d}', '\u{10fc}'..='\u{10fc}', + '\u{135d}'..='\u{135f}', '\u{1712}'..='\u{1714}', '\u{1732}'..='\u{1733}', + '\u{1752}'..='\u{1753}', '\u{1772}'..='\u{1773}', '\u{17b4}'..='\u{17b5}', + '\u{17b7}'..='\u{17bd}', '\u{17c6}'..='\u{17c6}', '\u{17c9}'..='\u{17d3}', + '\u{17d7}'..='\u{17d7}', '\u{17dd}'..='\u{17dd}', '\u{180b}'..='\u{180f}', + '\u{1843}'..='\u{1843}', '\u{1885}'..='\u{1886}', '\u{18a9}'..='\u{18a9}', + '\u{1920}'..='\u{1922}', '\u{1927}'..='\u{1928}', '\u{1932}'..='\u{1932}', + '\u{1939}'..='\u{193b}', '\u{1a17}'..='\u{1a18}', '\u{1a1b}'..='\u{1a1b}', + '\u{1a56}'..='\u{1a56}', '\u{1a58}'..='\u{1a5e}', '\u{1a60}'..='\u{1a60}', + '\u{1a62}'..='\u{1a62}', '\u{1a65}'..='\u{1a6c}', '\u{1a73}'..='\u{1a7c}', + '\u{1a7f}'..='\u{1a7f}', '\u{1aa7}'..='\u{1aa7}', '\u{1ab0}'..='\u{1add}', + '\u{1ae0}'..='\u{1aeb}', '\u{1b00}'..='\u{1b03}', '\u{1b34}'..='\u{1b34}', + '\u{1b36}'..='\u{1b3a}', '\u{1b3c}'..='\u{1b3c}', '\u{1b42}'..='\u{1b42}', + '\u{1b6b}'..='\u{1b73}', '\u{1b80}'..='\u{1b81}', '\u{1ba2}'..='\u{1ba5}', + '\u{1ba8}'..='\u{1ba9}', '\u{1bab}'..='\u{1bad}', '\u{1be6}'..='\u{1be6}', + '\u{1be8}'..='\u{1be9}', '\u{1bed}'..='\u{1bed}', '\u{1bef}'..='\u{1bf1}', + '\u{1c2c}'..='\u{1c33}', '\u{1c36}'..='\u{1c37}', '\u{1c78}'..='\u{1c7d}', + '\u{1cd0}'..='\u{1cd2}', '\u{1cd4}'..='\u{1ce0}', '\u{1ce2}'..='\u{1ce8}', + '\u{1ced}'..='\u{1ced}', '\u{1cf4}'..='\u{1cf4}', '\u{1cf8}'..='\u{1cf9}', + '\u{1d2c}'..='\u{1d6a}', '\u{1d78}'..='\u{1d78}', '\u{1d9b}'..='\u{1dff}', + '\u{1fbd}'..='\u{1fbd}', '\u{1fbf}'..='\u{1fc1}', '\u{1fcd}'..='\u{1fcf}', + '\u{1fdd}'..='\u{1fdf}', '\u{1fed}'..='\u{1fef}', '\u{1ffd}'..='\u{1ffe}', + '\u{200b}'..='\u{200f}', '\u{2018}'..='\u{2019}', '\u{2024}'..='\u{2024}', + '\u{2027}'..='\u{2027}', '\u{202a}'..='\u{202e}', '\u{2060}'..='\u{2064}', + '\u{2066}'..='\u{206f}', '\u{2071}'..='\u{2071}', '\u{207f}'..='\u{207f}', + '\u{2090}'..='\u{209c}', '\u{20d0}'..='\u{20f0}', '\u{2c7c}'..='\u{2c7d}', + '\u{2cef}'..='\u{2cf1}', '\u{2d6f}'..='\u{2d6f}', '\u{2d7f}'..='\u{2d7f}', + '\u{2de0}'..='\u{2dff}', '\u{2e2f}'..='\u{2e2f}', '\u{3005}'..='\u{3005}', + '\u{302a}'..='\u{302d}', '\u{3031}'..='\u{3035}', '\u{303b}'..='\u{303b}', + '\u{3099}'..='\u{309e}', '\u{30fc}'..='\u{30fe}', '\u{a015}'..='\u{a015}', + '\u{a4f8}'..='\u{a4fd}', '\u{a60c}'..='\u{a60c}', '\u{a66f}'..='\u{a672}', + '\u{a674}'..='\u{a67d}', '\u{a67f}'..='\u{a67f}', '\u{a69c}'..='\u{a69f}', + '\u{a6f0}'..='\u{a6f1}', '\u{a700}'..='\u{a721}', '\u{a770}'..='\u{a770}', + '\u{a788}'..='\u{a78a}', '\u{a7f1}'..='\u{a7f4}', '\u{a7f8}'..='\u{a7f9}', + '\u{a802}'..='\u{a802}', '\u{a806}'..='\u{a806}', '\u{a80b}'..='\u{a80b}', + '\u{a825}'..='\u{a826}', '\u{a82c}'..='\u{a82c}', '\u{a8c4}'..='\u{a8c5}', + '\u{a8e0}'..='\u{a8f1}', '\u{a8ff}'..='\u{a8ff}', '\u{a926}'..='\u{a92d}', + '\u{a947}'..='\u{a951}', '\u{a980}'..='\u{a982}', '\u{a9b3}'..='\u{a9b3}', + '\u{a9b6}'..='\u{a9b9}', '\u{a9bc}'..='\u{a9bd}', '\u{a9cf}'..='\u{a9cf}', + '\u{a9e5}'..='\u{a9e6}', '\u{aa29}'..='\u{aa2e}', '\u{aa31}'..='\u{aa32}', + '\u{aa35}'..='\u{aa36}', '\u{aa43}'..='\u{aa43}', '\u{aa4c}'..='\u{aa4c}', + '\u{aa70}'..='\u{aa70}', '\u{aa7c}'..='\u{aa7c}', '\u{aab0}'..='\u{aab0}', + '\u{aab2}'..='\u{aab4}', '\u{aab7}'..='\u{aab8}', '\u{aabe}'..='\u{aabf}', + '\u{aac1}'..='\u{aac1}', '\u{aadd}'..='\u{aadd}', '\u{aaec}'..='\u{aaed}', + '\u{aaf3}'..='\u{aaf4}', '\u{aaf6}'..='\u{aaf6}', '\u{ab5b}'..='\u{ab5f}', + '\u{ab69}'..='\u{ab6b}', '\u{abe5}'..='\u{abe5}', '\u{abe8}'..='\u{abe8}', + '\u{abed}'..='\u{abed}', '\u{fb1e}'..='\u{fb1e}', '\u{fbb2}'..='\u{fbc2}', + '\u{fe00}'..='\u{fe0f}', '\u{fe13}'..='\u{fe13}', '\u{fe20}'..='\u{fe2f}', + '\u{fe52}'..='\u{fe52}', '\u{fe55}'..='\u{fe55}', '\u{feff}'..='\u{feff}', + '\u{ff07}'..='\u{ff07}', '\u{ff0e}'..='\u{ff0e}', '\u{ff1a}'..='\u{ff1a}', + '\u{ff3e}'..='\u{ff3e}', '\u{ff40}'..='\u{ff40}', '\u{ff70}'..='\u{ff70}', + '\u{ff9e}'..='\u{ff9f}', '\u{ffe3}'..='\u{ffe3}', '\u{fff9}'..='\u{fffb}', + '\u{101fd}'..='\u{101fd}', '\u{102e0}'..='\u{102e0}', '\u{10376}'..='\u{1037a}', + '\u{10780}'..='\u{10785}', '\u{10787}'..='\u{107b0}', '\u{107b2}'..='\u{107ba}', + '\u{10a01}'..='\u{10a03}', '\u{10a05}'..='\u{10a06}', '\u{10a0c}'..='\u{10a0f}', + '\u{10a38}'..='\u{10a3a}', '\u{10a3f}'..='\u{10a3f}', '\u{10ae5}'..='\u{10ae6}', + '\u{10d24}'..='\u{10d27}', '\u{10d4e}'..='\u{10d4e}', '\u{10d69}'..='\u{10d6d}', + '\u{10d6f}'..='\u{10d6f}', '\u{10eab}'..='\u{10eac}', '\u{10ec5}'..='\u{10ec5}', + '\u{10efa}'..='\u{10eff}', '\u{10f46}'..='\u{10f50}', '\u{10f82}'..='\u{10f85}', + '\u{11001}'..='\u{11001}', '\u{11038}'..='\u{11046}', '\u{11070}'..='\u{11070}', + '\u{11073}'..='\u{11074}', '\u{1107f}'..='\u{11081}', '\u{110b3}'..='\u{110b6}', + '\u{110b9}'..='\u{110ba}', '\u{110bd}'..='\u{110bd}', '\u{110c2}'..='\u{110c2}', + '\u{110cd}'..='\u{110cd}', '\u{11100}'..='\u{11102}', '\u{11127}'..='\u{1112b}', + '\u{1112d}'..='\u{11134}', '\u{11173}'..='\u{11173}', '\u{11180}'..='\u{11181}', + '\u{111b6}'..='\u{111be}', '\u{111c9}'..='\u{111cc}', '\u{111cf}'..='\u{111cf}', + '\u{1122f}'..='\u{11231}', '\u{11234}'..='\u{11234}', '\u{11236}'..='\u{11237}', + '\u{1123e}'..='\u{1123e}', '\u{11241}'..='\u{11241}', '\u{112df}'..='\u{112df}', + '\u{112e3}'..='\u{112ea}', '\u{11300}'..='\u{11301}', '\u{1133b}'..='\u{1133c}', + '\u{11340}'..='\u{11340}', '\u{11366}'..='\u{1136c}', '\u{11370}'..='\u{11374}', + '\u{113bb}'..='\u{113c0}', '\u{113ce}'..='\u{113ce}', '\u{113d0}'..='\u{113d0}', + '\u{113d2}'..='\u{113d2}', '\u{113e1}'..='\u{113e2}', '\u{11438}'..='\u{1143f}', + '\u{11442}'..='\u{11444}', '\u{11446}'..='\u{11446}', '\u{1145e}'..='\u{1145e}', + '\u{114b3}'..='\u{114b8}', '\u{114ba}'..='\u{114ba}', '\u{114bf}'..='\u{114c0}', + '\u{114c2}'..='\u{114c3}', '\u{115b2}'..='\u{115b5}', '\u{115bc}'..='\u{115bd}', + '\u{115bf}'..='\u{115c0}', '\u{115dc}'..='\u{115dd}', '\u{11633}'..='\u{1163a}', + '\u{1163d}'..='\u{1163d}', '\u{1163f}'..='\u{11640}', '\u{116ab}'..='\u{116ab}', + '\u{116ad}'..='\u{116ad}', '\u{116b0}'..='\u{116b5}', '\u{116b7}'..='\u{116b7}', + '\u{1171d}'..='\u{1171d}', '\u{1171f}'..='\u{1171f}', '\u{11722}'..='\u{11725}', + '\u{11727}'..='\u{1172b}', '\u{1182f}'..='\u{11837}', '\u{11839}'..='\u{1183a}', + '\u{1193b}'..='\u{1193c}', '\u{1193e}'..='\u{1193e}', '\u{11943}'..='\u{11943}', + '\u{119d4}'..='\u{119d7}', '\u{119da}'..='\u{119db}', '\u{119e0}'..='\u{119e0}', + '\u{11a01}'..='\u{11a0a}', '\u{11a33}'..='\u{11a38}', '\u{11a3b}'..='\u{11a3e}', + '\u{11a47}'..='\u{11a47}', '\u{11a51}'..='\u{11a56}', '\u{11a59}'..='\u{11a5b}', + '\u{11a8a}'..='\u{11a96}', '\u{11a98}'..='\u{11a99}', '\u{11b60}'..='\u{11b60}', + '\u{11b62}'..='\u{11b64}', '\u{11b66}'..='\u{11b66}', '\u{11c30}'..='\u{11c36}', + '\u{11c38}'..='\u{11c3d}', '\u{11c3f}'..='\u{11c3f}', '\u{11c92}'..='\u{11ca7}', + '\u{11caa}'..='\u{11cb0}', '\u{11cb2}'..='\u{11cb3}', '\u{11cb5}'..='\u{11cb6}', + '\u{11d31}'..='\u{11d36}', '\u{11d3a}'..='\u{11d3a}', '\u{11d3c}'..='\u{11d3d}', + '\u{11d3f}'..='\u{11d45}', '\u{11d47}'..='\u{11d47}', '\u{11d90}'..='\u{11d91}', + '\u{11d95}'..='\u{11d95}', '\u{11d97}'..='\u{11d97}', '\u{11dd9}'..='\u{11dd9}', + '\u{11ef3}'..='\u{11ef4}', '\u{11f00}'..='\u{11f01}', '\u{11f36}'..='\u{11f3a}', + '\u{11f40}'..='\u{11f40}', '\u{11f42}'..='\u{11f42}', '\u{11f5a}'..='\u{11f5a}', + '\u{13430}'..='\u{13440}', '\u{13447}'..='\u{13455}', '\u{1611e}'..='\u{16129}', + '\u{1612d}'..='\u{1612f}', '\u{16af0}'..='\u{16af4}', '\u{16b30}'..='\u{16b36}', + '\u{16b40}'..='\u{16b43}', '\u{16d40}'..='\u{16d42}', '\u{16d6b}'..='\u{16d6c}', + '\u{16f4f}'..='\u{16f4f}', '\u{16f8f}'..='\u{16f9f}', '\u{16fe0}'..='\u{16fe1}', + '\u{16fe3}'..='\u{16fe4}', '\u{16ff2}'..='\u{16ff3}', '\u{1aff0}'..='\u{1aff3}', + '\u{1aff5}'..='\u{1affb}', '\u{1affd}'..='\u{1affe}', '\u{1bc9d}'..='\u{1bc9e}', + '\u{1bca0}'..='\u{1bca3}', '\u{1cf00}'..='\u{1cf2d}', '\u{1cf30}'..='\u{1cf46}', + '\u{1d167}'..='\u{1d169}', '\u{1d173}'..='\u{1d182}', '\u{1d185}'..='\u{1d18b}', + '\u{1d1aa}'..='\u{1d1ad}', '\u{1d242}'..='\u{1d244}', '\u{1da00}'..='\u{1da36}', + '\u{1da3b}'..='\u{1da6c}', '\u{1da75}'..='\u{1da75}', '\u{1da84}'..='\u{1da84}', + '\u{1da9b}'..='\u{1da9f}', '\u{1daa1}'..='\u{1daaf}', '\u{1e000}'..='\u{1e006}', + '\u{1e008}'..='\u{1e018}', '\u{1e01b}'..='\u{1e021}', '\u{1e023}'..='\u{1e024}', + '\u{1e026}'..='\u{1e02a}', '\u{1e030}'..='\u{1e06d}', '\u{1e08f}'..='\u{1e08f}', + '\u{1e130}'..='\u{1e13d}', '\u{1e2ae}'..='\u{1e2ae}', '\u{1e2ec}'..='\u{1e2ef}', + '\u{1e4eb}'..='\u{1e4ef}', '\u{1e5ee}'..='\u{1e5ef}', '\u{1e6e3}'..='\u{1e6e3}', + '\u{1e6e6}'..='\u{1e6e6}', '\u{1e6ee}'..='\u{1e6ef}', '\u{1e6f5}'..='\u{1e6f5}', + '\u{1e6ff}'..='\u{1e6ff}', '\u{1e8d0}'..='\u{1e8d6}', '\u{1e944}'..='\u{1e94b}', + '\u{1f3fb}'..='\u{1f3ff}', '\u{e0001}'..='\u{e0001}', '\u{e0020}'..='\u{e007f}', + '\u{e0100}'..='\u{e01ef}', +]; + +#[rustfmt::skip] +pub(super) static CASED: &[RangeInclusive; 156] = &[ + '\u{aa}'..='\u{aa}', '\u{b5}'..='\u{b5}', '\u{ba}'..='\u{ba}', '\u{c0}'..='\u{d6}', + '\u{d8}'..='\u{f6}', '\u{f8}'..='\u{1ba}', '\u{1bc}'..='\u{1bf}', '\u{1c4}'..='\u{293}', + '\u{296}'..='\u{2b8}', '\u{2c0}'..='\u{2c1}', '\u{2e0}'..='\u{2e4}', '\u{345}'..='\u{345}', + '\u{370}'..='\u{373}', '\u{376}'..='\u{377}', '\u{37a}'..='\u{37d}', '\u{37f}'..='\u{37f}', + '\u{386}'..='\u{386}', '\u{388}'..='\u{38a}', '\u{38c}'..='\u{38c}', '\u{38e}'..='\u{3a1}', + '\u{3a3}'..='\u{3f5}', '\u{3f7}'..='\u{481}', '\u{48a}'..='\u{52f}', '\u{531}'..='\u{556}', + '\u{560}'..='\u{588}', '\u{10a0}'..='\u{10c5}', '\u{10c7}'..='\u{10c7}', + '\u{10cd}'..='\u{10cd}', '\u{10d0}'..='\u{10fa}', '\u{10fc}'..='\u{10ff}', + '\u{13a0}'..='\u{13f5}', '\u{13f8}'..='\u{13fd}', '\u{1c80}'..='\u{1c8a}', + '\u{1c90}'..='\u{1cba}', '\u{1cbd}'..='\u{1cbf}', '\u{1d00}'..='\u{1dbf}', + '\u{1e00}'..='\u{1f15}', '\u{1f18}'..='\u{1f1d}', '\u{1f20}'..='\u{1f45}', + '\u{1f48}'..='\u{1f4d}', '\u{1f50}'..='\u{1f57}', '\u{1f59}'..='\u{1f59}', + '\u{1f5b}'..='\u{1f5b}', '\u{1f5d}'..='\u{1f5d}', '\u{1f5f}'..='\u{1f7d}', + '\u{1f80}'..='\u{1fb4}', '\u{1fb6}'..='\u{1fbc}', '\u{1fbe}'..='\u{1fbe}', + '\u{1fc2}'..='\u{1fc4}', '\u{1fc6}'..='\u{1fcc}', '\u{1fd0}'..='\u{1fd3}', + '\u{1fd6}'..='\u{1fdb}', '\u{1fe0}'..='\u{1fec}', '\u{1ff2}'..='\u{1ff4}', + '\u{1ff6}'..='\u{1ffc}', '\u{2071}'..='\u{2071}', '\u{207f}'..='\u{207f}', + '\u{2090}'..='\u{209c}', '\u{2102}'..='\u{2102}', '\u{2107}'..='\u{2107}', + '\u{210a}'..='\u{2113}', '\u{2115}'..='\u{2115}', '\u{2119}'..='\u{211d}', + '\u{2124}'..='\u{2124}', '\u{2126}'..='\u{2126}', '\u{2128}'..='\u{2128}', + '\u{212a}'..='\u{212d}', '\u{212f}'..='\u{2134}', '\u{2139}'..='\u{2139}', + '\u{213c}'..='\u{213f}', '\u{2145}'..='\u{2149}', '\u{214e}'..='\u{214e}', + '\u{2160}'..='\u{217f}', '\u{2183}'..='\u{2184}', '\u{24b6}'..='\u{24e9}', + '\u{2c00}'..='\u{2ce4}', '\u{2ceb}'..='\u{2cee}', '\u{2cf2}'..='\u{2cf3}', + '\u{2d00}'..='\u{2d25}', '\u{2d27}'..='\u{2d27}', '\u{2d2d}'..='\u{2d2d}', + '\u{a640}'..='\u{a66d}', '\u{a680}'..='\u{a69d}', '\u{a722}'..='\u{a787}', + '\u{a78b}'..='\u{a78e}', '\u{a790}'..='\u{a7dc}', '\u{a7f1}'..='\u{a7f6}', + '\u{a7f8}'..='\u{a7fa}', '\u{ab30}'..='\u{ab5a}', '\u{ab5c}'..='\u{ab69}', + '\u{ab70}'..='\u{abbf}', '\u{fb00}'..='\u{fb06}', '\u{fb13}'..='\u{fb17}', + '\u{ff21}'..='\u{ff3a}', '\u{ff41}'..='\u{ff5a}', '\u{10400}'..='\u{1044f}', + '\u{104b0}'..='\u{104d3}', '\u{104d8}'..='\u{104fb}', '\u{10570}'..='\u{1057a}', + '\u{1057c}'..='\u{1058a}', '\u{1058c}'..='\u{10592}', '\u{10594}'..='\u{10595}', + '\u{10597}'..='\u{105a1}', '\u{105a3}'..='\u{105b1}', '\u{105b3}'..='\u{105b9}', + '\u{105bb}'..='\u{105bc}', '\u{10780}'..='\u{10780}', '\u{10783}'..='\u{10785}', + '\u{10787}'..='\u{107b0}', '\u{107b2}'..='\u{107ba}', '\u{10c80}'..='\u{10cb2}', + '\u{10cc0}'..='\u{10cf2}', '\u{10d50}'..='\u{10d65}', '\u{10d70}'..='\u{10d85}', + '\u{118a0}'..='\u{118df}', '\u{16e40}'..='\u{16e7f}', '\u{16ea0}'..='\u{16eb8}', + '\u{16ebb}'..='\u{16ed3}', '\u{1d400}'..='\u{1d454}', '\u{1d456}'..='\u{1d49c}', + '\u{1d49e}'..='\u{1d49f}', '\u{1d4a2}'..='\u{1d4a2}', '\u{1d4a5}'..='\u{1d4a6}', + '\u{1d4a9}'..='\u{1d4ac}', '\u{1d4ae}'..='\u{1d4b9}', '\u{1d4bb}'..='\u{1d4bb}', + '\u{1d4bd}'..='\u{1d4c3}', '\u{1d4c5}'..='\u{1d505}', '\u{1d507}'..='\u{1d50a}', + '\u{1d50d}'..='\u{1d514}', '\u{1d516}'..='\u{1d51c}', '\u{1d51e}'..='\u{1d539}', + '\u{1d53b}'..='\u{1d53e}', '\u{1d540}'..='\u{1d544}', '\u{1d546}'..='\u{1d546}', + '\u{1d54a}'..='\u{1d550}', '\u{1d552}'..='\u{1d6a5}', '\u{1d6a8}'..='\u{1d6c0}', + '\u{1d6c2}'..='\u{1d6da}', '\u{1d6dc}'..='\u{1d6fa}', '\u{1d6fc}'..='\u{1d714}', + '\u{1d716}'..='\u{1d734}', '\u{1d736}'..='\u{1d74e}', '\u{1d750}'..='\u{1d76e}', + '\u{1d770}'..='\u{1d788}', '\u{1d78a}'..='\u{1d7a8}', '\u{1d7aa}'..='\u{1d7c2}', + '\u{1d7c4}'..='\u{1d7cb}', '\u{1df00}'..='\u{1df09}', '\u{1df0b}'..='\u{1df1e}', + '\u{1df25}'..='\u{1df2a}', '\u{1e030}'..='\u{1e06d}', '\u{1e900}'..='\u{1e943}', + '\u{1f130}'..='\u{1f149}', '\u{1f150}'..='\u{1f169}', '\u{1f170}'..='\u{1f189}', +]; + +#[rustfmt::skip] +pub(super) static GRAPHEME_EXTEND: &[RangeInclusive; 383] = &[ + '\u{300}'..='\u{36f}', '\u{483}'..='\u{489}', '\u{591}'..='\u{5bd}', '\u{5bf}'..='\u{5bf}', + '\u{5c1}'..='\u{5c2}', '\u{5c4}'..='\u{5c5}', '\u{5c7}'..='\u{5c7}', '\u{610}'..='\u{61a}', + '\u{64b}'..='\u{65f}', '\u{670}'..='\u{670}', '\u{6d6}'..='\u{6dc}', '\u{6df}'..='\u{6e4}', + '\u{6e7}'..='\u{6e8}', '\u{6ea}'..='\u{6ed}', '\u{711}'..='\u{711}', '\u{730}'..='\u{74a}', + '\u{7a6}'..='\u{7b0}', '\u{7eb}'..='\u{7f3}', '\u{7fd}'..='\u{7fd}', '\u{816}'..='\u{819}', + '\u{81b}'..='\u{823}', '\u{825}'..='\u{827}', '\u{829}'..='\u{82d}', '\u{859}'..='\u{85b}', + '\u{897}'..='\u{89f}', '\u{8ca}'..='\u{8e1}', '\u{8e3}'..='\u{902}', '\u{93a}'..='\u{93a}', + '\u{93c}'..='\u{93c}', '\u{941}'..='\u{948}', '\u{94d}'..='\u{94d}', '\u{951}'..='\u{957}', + '\u{962}'..='\u{963}', '\u{981}'..='\u{981}', '\u{9bc}'..='\u{9bc}', '\u{9be}'..='\u{9be}', + '\u{9c1}'..='\u{9c4}', '\u{9cd}'..='\u{9cd}', '\u{9d7}'..='\u{9d7}', '\u{9e2}'..='\u{9e3}', + '\u{9fe}'..='\u{9fe}', '\u{a01}'..='\u{a02}', '\u{a3c}'..='\u{a3c}', '\u{a41}'..='\u{a42}', + '\u{a47}'..='\u{a48}', '\u{a4b}'..='\u{a4d}', '\u{a51}'..='\u{a51}', '\u{a70}'..='\u{a71}', + '\u{a75}'..='\u{a75}', '\u{a81}'..='\u{a82}', '\u{abc}'..='\u{abc}', '\u{ac1}'..='\u{ac5}', + '\u{ac7}'..='\u{ac8}', '\u{acd}'..='\u{acd}', '\u{ae2}'..='\u{ae3}', '\u{afa}'..='\u{aff}', + '\u{b01}'..='\u{b01}', '\u{b3c}'..='\u{b3c}', '\u{b3e}'..='\u{b3f}', '\u{b41}'..='\u{b44}', + '\u{b4d}'..='\u{b4d}', '\u{b55}'..='\u{b57}', '\u{b62}'..='\u{b63}', '\u{b82}'..='\u{b82}', + '\u{bbe}'..='\u{bbe}', '\u{bc0}'..='\u{bc0}', '\u{bcd}'..='\u{bcd}', '\u{bd7}'..='\u{bd7}', + '\u{c00}'..='\u{c00}', '\u{c04}'..='\u{c04}', '\u{c3c}'..='\u{c3c}', '\u{c3e}'..='\u{c40}', + '\u{c46}'..='\u{c48}', '\u{c4a}'..='\u{c4d}', '\u{c55}'..='\u{c56}', '\u{c62}'..='\u{c63}', + '\u{c81}'..='\u{c81}', '\u{cbc}'..='\u{cbc}', '\u{cbf}'..='\u{cc0}', '\u{cc2}'..='\u{cc2}', + '\u{cc6}'..='\u{cc8}', '\u{cca}'..='\u{ccd}', '\u{cd5}'..='\u{cd6}', '\u{ce2}'..='\u{ce3}', + '\u{d00}'..='\u{d01}', '\u{d3b}'..='\u{d3c}', '\u{d3e}'..='\u{d3e}', '\u{d41}'..='\u{d44}', + '\u{d4d}'..='\u{d4d}', '\u{d57}'..='\u{d57}', '\u{d62}'..='\u{d63}', '\u{d81}'..='\u{d81}', + '\u{dca}'..='\u{dca}', '\u{dcf}'..='\u{dcf}', '\u{dd2}'..='\u{dd4}', '\u{dd6}'..='\u{dd6}', + '\u{ddf}'..='\u{ddf}', '\u{e31}'..='\u{e31}', '\u{e34}'..='\u{e3a}', '\u{e47}'..='\u{e4e}', + '\u{eb1}'..='\u{eb1}', '\u{eb4}'..='\u{ebc}', '\u{ec8}'..='\u{ece}', '\u{f18}'..='\u{f19}', + '\u{f35}'..='\u{f35}', '\u{f37}'..='\u{f37}', '\u{f39}'..='\u{f39}', '\u{f71}'..='\u{f7e}', + '\u{f80}'..='\u{f84}', '\u{f86}'..='\u{f87}', '\u{f8d}'..='\u{f97}', '\u{f99}'..='\u{fbc}', + '\u{fc6}'..='\u{fc6}', '\u{102d}'..='\u{1030}', '\u{1032}'..='\u{1037}', + '\u{1039}'..='\u{103a}', '\u{103d}'..='\u{103e}', '\u{1058}'..='\u{1059}', + '\u{105e}'..='\u{1060}', '\u{1071}'..='\u{1074}', '\u{1082}'..='\u{1082}', + '\u{1085}'..='\u{1086}', '\u{108d}'..='\u{108d}', '\u{109d}'..='\u{109d}', + '\u{135d}'..='\u{135f}', '\u{1712}'..='\u{1715}', '\u{1732}'..='\u{1734}', + '\u{1752}'..='\u{1753}', '\u{1772}'..='\u{1773}', '\u{17b4}'..='\u{17b5}', + '\u{17b7}'..='\u{17bd}', '\u{17c6}'..='\u{17c6}', '\u{17c9}'..='\u{17d3}', + '\u{17dd}'..='\u{17dd}', '\u{180b}'..='\u{180d}', '\u{180f}'..='\u{180f}', + '\u{1885}'..='\u{1886}', '\u{18a9}'..='\u{18a9}', '\u{1920}'..='\u{1922}', + '\u{1927}'..='\u{1928}', '\u{1932}'..='\u{1932}', '\u{1939}'..='\u{193b}', + '\u{1a17}'..='\u{1a18}', '\u{1a1b}'..='\u{1a1b}', '\u{1a56}'..='\u{1a56}', + '\u{1a58}'..='\u{1a5e}', '\u{1a60}'..='\u{1a60}', '\u{1a62}'..='\u{1a62}', + '\u{1a65}'..='\u{1a6c}', '\u{1a73}'..='\u{1a7c}', '\u{1a7f}'..='\u{1a7f}', + '\u{1ab0}'..='\u{1add}', '\u{1ae0}'..='\u{1aeb}', '\u{1b00}'..='\u{1b03}', + '\u{1b34}'..='\u{1b3d}', '\u{1b42}'..='\u{1b44}', '\u{1b6b}'..='\u{1b73}', + '\u{1b80}'..='\u{1b81}', '\u{1ba2}'..='\u{1ba5}', '\u{1ba8}'..='\u{1bad}', + '\u{1be6}'..='\u{1be6}', '\u{1be8}'..='\u{1be9}', '\u{1bed}'..='\u{1bed}', + '\u{1bef}'..='\u{1bf3}', '\u{1c2c}'..='\u{1c33}', '\u{1c36}'..='\u{1c37}', + '\u{1cd0}'..='\u{1cd2}', '\u{1cd4}'..='\u{1ce0}', '\u{1ce2}'..='\u{1ce8}', + '\u{1ced}'..='\u{1ced}', '\u{1cf4}'..='\u{1cf4}', '\u{1cf8}'..='\u{1cf9}', + '\u{1dc0}'..='\u{1dff}', '\u{200c}'..='\u{200c}', '\u{20d0}'..='\u{20f0}', + '\u{2cef}'..='\u{2cf1}', '\u{2d7f}'..='\u{2d7f}', '\u{2de0}'..='\u{2dff}', + '\u{302a}'..='\u{302f}', '\u{3099}'..='\u{309a}', '\u{a66f}'..='\u{a672}', + '\u{a674}'..='\u{a67d}', '\u{a69e}'..='\u{a69f}', '\u{a6f0}'..='\u{a6f1}', + '\u{a802}'..='\u{a802}', '\u{a806}'..='\u{a806}', '\u{a80b}'..='\u{a80b}', + '\u{a825}'..='\u{a826}', '\u{a82c}'..='\u{a82c}', '\u{a8c4}'..='\u{a8c5}', + '\u{a8e0}'..='\u{a8f1}', '\u{a8ff}'..='\u{a8ff}', '\u{a926}'..='\u{a92d}', + '\u{a947}'..='\u{a951}', '\u{a953}'..='\u{a953}', '\u{a980}'..='\u{a982}', + '\u{a9b3}'..='\u{a9b3}', '\u{a9b6}'..='\u{a9b9}', '\u{a9bc}'..='\u{a9bd}', + '\u{a9c0}'..='\u{a9c0}', '\u{a9e5}'..='\u{a9e5}', '\u{aa29}'..='\u{aa2e}', + '\u{aa31}'..='\u{aa32}', '\u{aa35}'..='\u{aa36}', '\u{aa43}'..='\u{aa43}', + '\u{aa4c}'..='\u{aa4c}', '\u{aa7c}'..='\u{aa7c}', '\u{aab0}'..='\u{aab0}', + '\u{aab2}'..='\u{aab4}', '\u{aab7}'..='\u{aab8}', '\u{aabe}'..='\u{aabf}', + '\u{aac1}'..='\u{aac1}', '\u{aaec}'..='\u{aaed}', '\u{aaf6}'..='\u{aaf6}', + '\u{abe5}'..='\u{abe5}', '\u{abe8}'..='\u{abe8}', '\u{abed}'..='\u{abed}', + '\u{fb1e}'..='\u{fb1e}', '\u{fe00}'..='\u{fe0f}', '\u{fe20}'..='\u{fe2f}', + '\u{ff9e}'..='\u{ff9f}', '\u{101fd}'..='\u{101fd}', '\u{102e0}'..='\u{102e0}', + '\u{10376}'..='\u{1037a}', '\u{10a01}'..='\u{10a03}', '\u{10a05}'..='\u{10a06}', + '\u{10a0c}'..='\u{10a0f}', '\u{10a38}'..='\u{10a3a}', '\u{10a3f}'..='\u{10a3f}', + '\u{10ae5}'..='\u{10ae6}', '\u{10d24}'..='\u{10d27}', '\u{10d69}'..='\u{10d6d}', + '\u{10eab}'..='\u{10eac}', '\u{10efa}'..='\u{10eff}', '\u{10f46}'..='\u{10f50}', + '\u{10f82}'..='\u{10f85}', '\u{11001}'..='\u{11001}', '\u{11038}'..='\u{11046}', + '\u{11070}'..='\u{11070}', '\u{11073}'..='\u{11074}', '\u{1107f}'..='\u{11081}', + '\u{110b3}'..='\u{110b6}', '\u{110b9}'..='\u{110ba}', '\u{110c2}'..='\u{110c2}', + '\u{11100}'..='\u{11102}', '\u{11127}'..='\u{1112b}', '\u{1112d}'..='\u{11134}', + '\u{11173}'..='\u{11173}', '\u{11180}'..='\u{11181}', '\u{111b6}'..='\u{111be}', + '\u{111c0}'..='\u{111c0}', '\u{111c9}'..='\u{111cc}', '\u{111cf}'..='\u{111cf}', + '\u{1122f}'..='\u{11231}', '\u{11234}'..='\u{11237}', '\u{1123e}'..='\u{1123e}', + '\u{11241}'..='\u{11241}', '\u{112df}'..='\u{112df}', '\u{112e3}'..='\u{112ea}', + '\u{11300}'..='\u{11301}', '\u{1133b}'..='\u{1133c}', '\u{1133e}'..='\u{1133e}', + '\u{11340}'..='\u{11340}', '\u{1134d}'..='\u{1134d}', '\u{11357}'..='\u{11357}', + '\u{11366}'..='\u{1136c}', '\u{11370}'..='\u{11374}', '\u{113b8}'..='\u{113b8}', + '\u{113bb}'..='\u{113c0}', '\u{113c2}'..='\u{113c2}', '\u{113c5}'..='\u{113c5}', + '\u{113c7}'..='\u{113c9}', '\u{113ce}'..='\u{113d0}', '\u{113d2}'..='\u{113d2}', + '\u{113e1}'..='\u{113e2}', '\u{11438}'..='\u{1143f}', '\u{11442}'..='\u{11444}', + '\u{11446}'..='\u{11446}', '\u{1145e}'..='\u{1145e}', '\u{114b0}'..='\u{114b0}', + '\u{114b3}'..='\u{114b8}', '\u{114ba}'..='\u{114ba}', '\u{114bd}'..='\u{114bd}', + '\u{114bf}'..='\u{114c0}', '\u{114c2}'..='\u{114c3}', '\u{115af}'..='\u{115af}', + '\u{115b2}'..='\u{115b5}', '\u{115bc}'..='\u{115bd}', '\u{115bf}'..='\u{115c0}', + '\u{115dc}'..='\u{115dd}', '\u{11633}'..='\u{1163a}', '\u{1163d}'..='\u{1163d}', + '\u{1163f}'..='\u{11640}', '\u{116ab}'..='\u{116ab}', '\u{116ad}'..='\u{116ad}', + '\u{116b0}'..='\u{116b7}', '\u{1171d}'..='\u{1171d}', '\u{1171f}'..='\u{1171f}', + '\u{11722}'..='\u{11725}', '\u{11727}'..='\u{1172b}', '\u{1182f}'..='\u{11837}', + '\u{11839}'..='\u{1183a}', '\u{11930}'..='\u{11930}', '\u{1193b}'..='\u{1193e}', + '\u{11943}'..='\u{11943}', '\u{119d4}'..='\u{119d7}', '\u{119da}'..='\u{119db}', + '\u{119e0}'..='\u{119e0}', '\u{11a01}'..='\u{11a0a}', '\u{11a33}'..='\u{11a38}', + '\u{11a3b}'..='\u{11a3e}', '\u{11a47}'..='\u{11a47}', '\u{11a51}'..='\u{11a56}', + '\u{11a59}'..='\u{11a5b}', '\u{11a8a}'..='\u{11a96}', '\u{11a98}'..='\u{11a99}', + '\u{11b60}'..='\u{11b60}', '\u{11b62}'..='\u{11b64}', '\u{11b66}'..='\u{11b66}', + '\u{11c30}'..='\u{11c36}', '\u{11c38}'..='\u{11c3d}', '\u{11c3f}'..='\u{11c3f}', + '\u{11c92}'..='\u{11ca7}', '\u{11caa}'..='\u{11cb0}', '\u{11cb2}'..='\u{11cb3}', + '\u{11cb5}'..='\u{11cb6}', '\u{11d31}'..='\u{11d36}', '\u{11d3a}'..='\u{11d3a}', + '\u{11d3c}'..='\u{11d3d}', '\u{11d3f}'..='\u{11d45}', '\u{11d47}'..='\u{11d47}', + '\u{11d90}'..='\u{11d91}', '\u{11d95}'..='\u{11d95}', '\u{11d97}'..='\u{11d97}', + '\u{11ef3}'..='\u{11ef4}', '\u{11f00}'..='\u{11f01}', '\u{11f36}'..='\u{11f3a}', + '\u{11f40}'..='\u{11f42}', '\u{11f5a}'..='\u{11f5a}', '\u{13440}'..='\u{13440}', + '\u{13447}'..='\u{13455}', '\u{1611e}'..='\u{16129}', '\u{1612d}'..='\u{1612f}', + '\u{16af0}'..='\u{16af4}', '\u{16b30}'..='\u{16b36}', '\u{16f4f}'..='\u{16f4f}', + '\u{16f8f}'..='\u{16f92}', '\u{16fe4}'..='\u{16fe4}', '\u{16ff0}'..='\u{16ff1}', + '\u{1bc9d}'..='\u{1bc9e}', '\u{1cf00}'..='\u{1cf2d}', '\u{1cf30}'..='\u{1cf46}', + '\u{1d165}'..='\u{1d169}', '\u{1d16d}'..='\u{1d172}', '\u{1d17b}'..='\u{1d182}', + '\u{1d185}'..='\u{1d18b}', '\u{1d1aa}'..='\u{1d1ad}', '\u{1d242}'..='\u{1d244}', + '\u{1da00}'..='\u{1da36}', '\u{1da3b}'..='\u{1da6c}', '\u{1da75}'..='\u{1da75}', + '\u{1da84}'..='\u{1da84}', '\u{1da9b}'..='\u{1da9f}', '\u{1daa1}'..='\u{1daaf}', + '\u{1e000}'..='\u{1e006}', '\u{1e008}'..='\u{1e018}', '\u{1e01b}'..='\u{1e021}', + '\u{1e023}'..='\u{1e024}', '\u{1e026}'..='\u{1e02a}', '\u{1e08f}'..='\u{1e08f}', + '\u{1e130}'..='\u{1e136}', '\u{1e2ae}'..='\u{1e2ae}', '\u{1e2ec}'..='\u{1e2ef}', + '\u{1e4ec}'..='\u{1e4ef}', '\u{1e5ee}'..='\u{1e5ef}', '\u{1e6e3}'..='\u{1e6e3}', + '\u{1e6e6}'..='\u{1e6e6}', '\u{1e6ee}'..='\u{1e6ef}', '\u{1e6f5}'..='\u{1e6f5}', + '\u{1e8d0}'..='\u{1e8d6}', '\u{1e944}'..='\u{1e94a}', '\u{e0020}'..='\u{e007f}', + '\u{e0100}'..='\u{e01ef}', +]; + +#[rustfmt::skip] +pub(super) static LOWERCASE: &[RangeInclusive; 676] = &[ + '\u{aa}'..='\u{aa}', '\u{b5}'..='\u{b5}', '\u{ba}'..='\u{ba}', '\u{df}'..='\u{f6}', + '\u{f8}'..='\u{ff}', '\u{101}'..='\u{101}', '\u{103}'..='\u{103}', '\u{105}'..='\u{105}', + '\u{107}'..='\u{107}', '\u{109}'..='\u{109}', '\u{10b}'..='\u{10b}', '\u{10d}'..='\u{10d}', + '\u{10f}'..='\u{10f}', '\u{111}'..='\u{111}', '\u{113}'..='\u{113}', '\u{115}'..='\u{115}', + '\u{117}'..='\u{117}', '\u{119}'..='\u{119}', '\u{11b}'..='\u{11b}', '\u{11d}'..='\u{11d}', + '\u{11f}'..='\u{11f}', '\u{121}'..='\u{121}', '\u{123}'..='\u{123}', '\u{125}'..='\u{125}', + '\u{127}'..='\u{127}', '\u{129}'..='\u{129}', '\u{12b}'..='\u{12b}', '\u{12d}'..='\u{12d}', + '\u{12f}'..='\u{12f}', '\u{131}'..='\u{131}', '\u{133}'..='\u{133}', '\u{135}'..='\u{135}', + '\u{137}'..='\u{138}', '\u{13a}'..='\u{13a}', '\u{13c}'..='\u{13c}', '\u{13e}'..='\u{13e}', + '\u{140}'..='\u{140}', '\u{142}'..='\u{142}', '\u{144}'..='\u{144}', '\u{146}'..='\u{146}', + '\u{148}'..='\u{149}', '\u{14b}'..='\u{14b}', '\u{14d}'..='\u{14d}', '\u{14f}'..='\u{14f}', + '\u{151}'..='\u{151}', '\u{153}'..='\u{153}', '\u{155}'..='\u{155}', '\u{157}'..='\u{157}', + '\u{159}'..='\u{159}', '\u{15b}'..='\u{15b}', '\u{15d}'..='\u{15d}', '\u{15f}'..='\u{15f}', + '\u{161}'..='\u{161}', '\u{163}'..='\u{163}', '\u{165}'..='\u{165}', '\u{167}'..='\u{167}', + '\u{169}'..='\u{169}', '\u{16b}'..='\u{16b}', '\u{16d}'..='\u{16d}', '\u{16f}'..='\u{16f}', + '\u{171}'..='\u{171}', '\u{173}'..='\u{173}', '\u{175}'..='\u{175}', '\u{177}'..='\u{177}', + '\u{17a}'..='\u{17a}', '\u{17c}'..='\u{17c}', '\u{17e}'..='\u{180}', '\u{183}'..='\u{183}', + '\u{185}'..='\u{185}', '\u{188}'..='\u{188}', '\u{18c}'..='\u{18d}', '\u{192}'..='\u{192}', + '\u{195}'..='\u{195}', '\u{199}'..='\u{19b}', '\u{19e}'..='\u{19e}', '\u{1a1}'..='\u{1a1}', + '\u{1a3}'..='\u{1a3}', '\u{1a5}'..='\u{1a5}', '\u{1a8}'..='\u{1a8}', '\u{1aa}'..='\u{1ab}', + '\u{1ad}'..='\u{1ad}', '\u{1b0}'..='\u{1b0}', '\u{1b4}'..='\u{1b4}', '\u{1b6}'..='\u{1b6}', + '\u{1b9}'..='\u{1ba}', '\u{1bd}'..='\u{1bf}', '\u{1c6}'..='\u{1c6}', '\u{1c9}'..='\u{1c9}', + '\u{1cc}'..='\u{1cc}', '\u{1ce}'..='\u{1ce}', '\u{1d0}'..='\u{1d0}', '\u{1d2}'..='\u{1d2}', + '\u{1d4}'..='\u{1d4}', '\u{1d6}'..='\u{1d6}', '\u{1d8}'..='\u{1d8}', '\u{1da}'..='\u{1da}', + '\u{1dc}'..='\u{1dd}', '\u{1df}'..='\u{1df}', '\u{1e1}'..='\u{1e1}', '\u{1e3}'..='\u{1e3}', + '\u{1e5}'..='\u{1e5}', '\u{1e7}'..='\u{1e7}', '\u{1e9}'..='\u{1e9}', '\u{1eb}'..='\u{1eb}', + '\u{1ed}'..='\u{1ed}', '\u{1ef}'..='\u{1f0}', '\u{1f3}'..='\u{1f3}', '\u{1f5}'..='\u{1f5}', + '\u{1f9}'..='\u{1f9}', '\u{1fb}'..='\u{1fb}', '\u{1fd}'..='\u{1fd}', '\u{1ff}'..='\u{1ff}', + '\u{201}'..='\u{201}', '\u{203}'..='\u{203}', '\u{205}'..='\u{205}', '\u{207}'..='\u{207}', + '\u{209}'..='\u{209}', '\u{20b}'..='\u{20b}', '\u{20d}'..='\u{20d}', '\u{20f}'..='\u{20f}', + '\u{211}'..='\u{211}', '\u{213}'..='\u{213}', '\u{215}'..='\u{215}', '\u{217}'..='\u{217}', + '\u{219}'..='\u{219}', '\u{21b}'..='\u{21b}', '\u{21d}'..='\u{21d}', '\u{21f}'..='\u{21f}', + '\u{221}'..='\u{221}', '\u{223}'..='\u{223}', '\u{225}'..='\u{225}', '\u{227}'..='\u{227}', + '\u{229}'..='\u{229}', '\u{22b}'..='\u{22b}', '\u{22d}'..='\u{22d}', '\u{22f}'..='\u{22f}', + '\u{231}'..='\u{231}', '\u{233}'..='\u{239}', '\u{23c}'..='\u{23c}', '\u{23f}'..='\u{240}', + '\u{242}'..='\u{242}', '\u{247}'..='\u{247}', '\u{249}'..='\u{249}', '\u{24b}'..='\u{24b}', + '\u{24d}'..='\u{24d}', '\u{24f}'..='\u{293}', '\u{296}'..='\u{2b8}', '\u{2c0}'..='\u{2c1}', + '\u{2e0}'..='\u{2e4}', '\u{345}'..='\u{345}', '\u{371}'..='\u{371}', '\u{373}'..='\u{373}', + '\u{377}'..='\u{377}', '\u{37a}'..='\u{37d}', '\u{390}'..='\u{390}', '\u{3ac}'..='\u{3ce}', + '\u{3d0}'..='\u{3d1}', '\u{3d5}'..='\u{3d7}', '\u{3d9}'..='\u{3d9}', '\u{3db}'..='\u{3db}', + '\u{3dd}'..='\u{3dd}', '\u{3df}'..='\u{3df}', '\u{3e1}'..='\u{3e1}', '\u{3e3}'..='\u{3e3}', + '\u{3e5}'..='\u{3e5}', '\u{3e7}'..='\u{3e7}', '\u{3e9}'..='\u{3e9}', '\u{3eb}'..='\u{3eb}', + '\u{3ed}'..='\u{3ed}', '\u{3ef}'..='\u{3f3}', '\u{3f5}'..='\u{3f5}', '\u{3f8}'..='\u{3f8}', + '\u{3fb}'..='\u{3fc}', '\u{430}'..='\u{45f}', '\u{461}'..='\u{461}', '\u{463}'..='\u{463}', + '\u{465}'..='\u{465}', '\u{467}'..='\u{467}', '\u{469}'..='\u{469}', '\u{46b}'..='\u{46b}', + '\u{46d}'..='\u{46d}', '\u{46f}'..='\u{46f}', '\u{471}'..='\u{471}', '\u{473}'..='\u{473}', + '\u{475}'..='\u{475}', '\u{477}'..='\u{477}', '\u{479}'..='\u{479}', '\u{47b}'..='\u{47b}', + '\u{47d}'..='\u{47d}', '\u{47f}'..='\u{47f}', '\u{481}'..='\u{481}', '\u{48b}'..='\u{48b}', + '\u{48d}'..='\u{48d}', '\u{48f}'..='\u{48f}', '\u{491}'..='\u{491}', '\u{493}'..='\u{493}', + '\u{495}'..='\u{495}', '\u{497}'..='\u{497}', '\u{499}'..='\u{499}', '\u{49b}'..='\u{49b}', + '\u{49d}'..='\u{49d}', '\u{49f}'..='\u{49f}', '\u{4a1}'..='\u{4a1}', '\u{4a3}'..='\u{4a3}', + '\u{4a5}'..='\u{4a5}', '\u{4a7}'..='\u{4a7}', '\u{4a9}'..='\u{4a9}', '\u{4ab}'..='\u{4ab}', + '\u{4ad}'..='\u{4ad}', '\u{4af}'..='\u{4af}', '\u{4b1}'..='\u{4b1}', '\u{4b3}'..='\u{4b3}', + '\u{4b5}'..='\u{4b5}', '\u{4b7}'..='\u{4b7}', '\u{4b9}'..='\u{4b9}', '\u{4bb}'..='\u{4bb}', + '\u{4bd}'..='\u{4bd}', '\u{4bf}'..='\u{4bf}', '\u{4c2}'..='\u{4c2}', '\u{4c4}'..='\u{4c4}', + '\u{4c6}'..='\u{4c6}', '\u{4c8}'..='\u{4c8}', '\u{4ca}'..='\u{4ca}', '\u{4cc}'..='\u{4cc}', + '\u{4ce}'..='\u{4cf}', '\u{4d1}'..='\u{4d1}', '\u{4d3}'..='\u{4d3}', '\u{4d5}'..='\u{4d5}', + '\u{4d7}'..='\u{4d7}', '\u{4d9}'..='\u{4d9}', '\u{4db}'..='\u{4db}', '\u{4dd}'..='\u{4dd}', + '\u{4df}'..='\u{4df}', '\u{4e1}'..='\u{4e1}', '\u{4e3}'..='\u{4e3}', '\u{4e5}'..='\u{4e5}', + '\u{4e7}'..='\u{4e7}', '\u{4e9}'..='\u{4e9}', '\u{4eb}'..='\u{4eb}', '\u{4ed}'..='\u{4ed}', + '\u{4ef}'..='\u{4ef}', '\u{4f1}'..='\u{4f1}', '\u{4f3}'..='\u{4f3}', '\u{4f5}'..='\u{4f5}', + '\u{4f7}'..='\u{4f7}', '\u{4f9}'..='\u{4f9}', '\u{4fb}'..='\u{4fb}', '\u{4fd}'..='\u{4fd}', + '\u{4ff}'..='\u{4ff}', '\u{501}'..='\u{501}', '\u{503}'..='\u{503}', '\u{505}'..='\u{505}', + '\u{507}'..='\u{507}', '\u{509}'..='\u{509}', '\u{50b}'..='\u{50b}', '\u{50d}'..='\u{50d}', + '\u{50f}'..='\u{50f}', '\u{511}'..='\u{511}', '\u{513}'..='\u{513}', '\u{515}'..='\u{515}', + '\u{517}'..='\u{517}', '\u{519}'..='\u{519}', '\u{51b}'..='\u{51b}', '\u{51d}'..='\u{51d}', + '\u{51f}'..='\u{51f}', '\u{521}'..='\u{521}', '\u{523}'..='\u{523}', '\u{525}'..='\u{525}', + '\u{527}'..='\u{527}', '\u{529}'..='\u{529}', '\u{52b}'..='\u{52b}', '\u{52d}'..='\u{52d}', + '\u{52f}'..='\u{52f}', '\u{560}'..='\u{588}', '\u{10d0}'..='\u{10fa}', + '\u{10fc}'..='\u{10ff}', '\u{13f8}'..='\u{13fd}', '\u{1c80}'..='\u{1c88}', + '\u{1c8a}'..='\u{1c8a}', '\u{1d00}'..='\u{1dbf}', '\u{1e01}'..='\u{1e01}', + '\u{1e03}'..='\u{1e03}', '\u{1e05}'..='\u{1e05}', '\u{1e07}'..='\u{1e07}', + '\u{1e09}'..='\u{1e09}', '\u{1e0b}'..='\u{1e0b}', '\u{1e0d}'..='\u{1e0d}', + '\u{1e0f}'..='\u{1e0f}', '\u{1e11}'..='\u{1e11}', '\u{1e13}'..='\u{1e13}', + '\u{1e15}'..='\u{1e15}', '\u{1e17}'..='\u{1e17}', '\u{1e19}'..='\u{1e19}', + '\u{1e1b}'..='\u{1e1b}', '\u{1e1d}'..='\u{1e1d}', '\u{1e1f}'..='\u{1e1f}', + '\u{1e21}'..='\u{1e21}', '\u{1e23}'..='\u{1e23}', '\u{1e25}'..='\u{1e25}', + '\u{1e27}'..='\u{1e27}', '\u{1e29}'..='\u{1e29}', '\u{1e2b}'..='\u{1e2b}', + '\u{1e2d}'..='\u{1e2d}', '\u{1e2f}'..='\u{1e2f}', '\u{1e31}'..='\u{1e31}', + '\u{1e33}'..='\u{1e33}', '\u{1e35}'..='\u{1e35}', '\u{1e37}'..='\u{1e37}', + '\u{1e39}'..='\u{1e39}', '\u{1e3b}'..='\u{1e3b}', '\u{1e3d}'..='\u{1e3d}', + '\u{1e3f}'..='\u{1e3f}', '\u{1e41}'..='\u{1e41}', '\u{1e43}'..='\u{1e43}', + '\u{1e45}'..='\u{1e45}', '\u{1e47}'..='\u{1e47}', '\u{1e49}'..='\u{1e49}', + '\u{1e4b}'..='\u{1e4b}', '\u{1e4d}'..='\u{1e4d}', '\u{1e4f}'..='\u{1e4f}', + '\u{1e51}'..='\u{1e51}', '\u{1e53}'..='\u{1e53}', '\u{1e55}'..='\u{1e55}', + '\u{1e57}'..='\u{1e57}', '\u{1e59}'..='\u{1e59}', '\u{1e5b}'..='\u{1e5b}', + '\u{1e5d}'..='\u{1e5d}', '\u{1e5f}'..='\u{1e5f}', '\u{1e61}'..='\u{1e61}', + '\u{1e63}'..='\u{1e63}', '\u{1e65}'..='\u{1e65}', '\u{1e67}'..='\u{1e67}', + '\u{1e69}'..='\u{1e69}', '\u{1e6b}'..='\u{1e6b}', '\u{1e6d}'..='\u{1e6d}', + '\u{1e6f}'..='\u{1e6f}', '\u{1e71}'..='\u{1e71}', '\u{1e73}'..='\u{1e73}', + '\u{1e75}'..='\u{1e75}', '\u{1e77}'..='\u{1e77}', '\u{1e79}'..='\u{1e79}', + '\u{1e7b}'..='\u{1e7b}', '\u{1e7d}'..='\u{1e7d}', '\u{1e7f}'..='\u{1e7f}', + '\u{1e81}'..='\u{1e81}', '\u{1e83}'..='\u{1e83}', '\u{1e85}'..='\u{1e85}', + '\u{1e87}'..='\u{1e87}', '\u{1e89}'..='\u{1e89}', '\u{1e8b}'..='\u{1e8b}', + '\u{1e8d}'..='\u{1e8d}', '\u{1e8f}'..='\u{1e8f}', '\u{1e91}'..='\u{1e91}', + '\u{1e93}'..='\u{1e93}', '\u{1e95}'..='\u{1e9d}', '\u{1e9f}'..='\u{1e9f}', + '\u{1ea1}'..='\u{1ea1}', '\u{1ea3}'..='\u{1ea3}', '\u{1ea5}'..='\u{1ea5}', + '\u{1ea7}'..='\u{1ea7}', '\u{1ea9}'..='\u{1ea9}', '\u{1eab}'..='\u{1eab}', + '\u{1ead}'..='\u{1ead}', '\u{1eaf}'..='\u{1eaf}', '\u{1eb1}'..='\u{1eb1}', + '\u{1eb3}'..='\u{1eb3}', '\u{1eb5}'..='\u{1eb5}', '\u{1eb7}'..='\u{1eb7}', + '\u{1eb9}'..='\u{1eb9}', '\u{1ebb}'..='\u{1ebb}', '\u{1ebd}'..='\u{1ebd}', + '\u{1ebf}'..='\u{1ebf}', '\u{1ec1}'..='\u{1ec1}', '\u{1ec3}'..='\u{1ec3}', + '\u{1ec5}'..='\u{1ec5}', '\u{1ec7}'..='\u{1ec7}', '\u{1ec9}'..='\u{1ec9}', + '\u{1ecb}'..='\u{1ecb}', '\u{1ecd}'..='\u{1ecd}', '\u{1ecf}'..='\u{1ecf}', + '\u{1ed1}'..='\u{1ed1}', '\u{1ed3}'..='\u{1ed3}', '\u{1ed5}'..='\u{1ed5}', + '\u{1ed7}'..='\u{1ed7}', '\u{1ed9}'..='\u{1ed9}', '\u{1edb}'..='\u{1edb}', + '\u{1edd}'..='\u{1edd}', '\u{1edf}'..='\u{1edf}', '\u{1ee1}'..='\u{1ee1}', + '\u{1ee3}'..='\u{1ee3}', '\u{1ee5}'..='\u{1ee5}', '\u{1ee7}'..='\u{1ee7}', + '\u{1ee9}'..='\u{1ee9}', '\u{1eeb}'..='\u{1eeb}', '\u{1eed}'..='\u{1eed}', + '\u{1eef}'..='\u{1eef}', '\u{1ef1}'..='\u{1ef1}', '\u{1ef3}'..='\u{1ef3}', + '\u{1ef5}'..='\u{1ef5}', '\u{1ef7}'..='\u{1ef7}', '\u{1ef9}'..='\u{1ef9}', + '\u{1efb}'..='\u{1efb}', '\u{1efd}'..='\u{1efd}', '\u{1eff}'..='\u{1f07}', + '\u{1f10}'..='\u{1f15}', '\u{1f20}'..='\u{1f27}', '\u{1f30}'..='\u{1f37}', + '\u{1f40}'..='\u{1f45}', '\u{1f50}'..='\u{1f57}', '\u{1f60}'..='\u{1f67}', + '\u{1f70}'..='\u{1f7d}', '\u{1f80}'..='\u{1f87}', '\u{1f90}'..='\u{1f97}', + '\u{1fa0}'..='\u{1fa7}', '\u{1fb0}'..='\u{1fb4}', '\u{1fb6}'..='\u{1fb7}', + '\u{1fbe}'..='\u{1fbe}', '\u{1fc2}'..='\u{1fc4}', '\u{1fc6}'..='\u{1fc7}', + '\u{1fd0}'..='\u{1fd3}', '\u{1fd6}'..='\u{1fd7}', '\u{1fe0}'..='\u{1fe7}', + '\u{1ff2}'..='\u{1ff4}', '\u{1ff6}'..='\u{1ff7}', '\u{2071}'..='\u{2071}', + '\u{207f}'..='\u{207f}', '\u{2090}'..='\u{209c}', '\u{210a}'..='\u{210a}', + '\u{210e}'..='\u{210f}', '\u{2113}'..='\u{2113}', '\u{212f}'..='\u{212f}', + '\u{2134}'..='\u{2134}', '\u{2139}'..='\u{2139}', '\u{213c}'..='\u{213d}', + '\u{2146}'..='\u{2149}', '\u{214e}'..='\u{214e}', '\u{2170}'..='\u{217f}', + '\u{2184}'..='\u{2184}', '\u{24d0}'..='\u{24e9}', '\u{2c30}'..='\u{2c5f}', + '\u{2c61}'..='\u{2c61}', '\u{2c65}'..='\u{2c66}', '\u{2c68}'..='\u{2c68}', + '\u{2c6a}'..='\u{2c6a}', '\u{2c6c}'..='\u{2c6c}', '\u{2c71}'..='\u{2c71}', + '\u{2c73}'..='\u{2c74}', '\u{2c76}'..='\u{2c7d}', '\u{2c81}'..='\u{2c81}', + '\u{2c83}'..='\u{2c83}', '\u{2c85}'..='\u{2c85}', '\u{2c87}'..='\u{2c87}', + '\u{2c89}'..='\u{2c89}', '\u{2c8b}'..='\u{2c8b}', '\u{2c8d}'..='\u{2c8d}', + '\u{2c8f}'..='\u{2c8f}', '\u{2c91}'..='\u{2c91}', '\u{2c93}'..='\u{2c93}', + '\u{2c95}'..='\u{2c95}', '\u{2c97}'..='\u{2c97}', '\u{2c99}'..='\u{2c99}', + '\u{2c9b}'..='\u{2c9b}', '\u{2c9d}'..='\u{2c9d}', '\u{2c9f}'..='\u{2c9f}', + '\u{2ca1}'..='\u{2ca1}', '\u{2ca3}'..='\u{2ca3}', '\u{2ca5}'..='\u{2ca5}', + '\u{2ca7}'..='\u{2ca7}', '\u{2ca9}'..='\u{2ca9}', '\u{2cab}'..='\u{2cab}', + '\u{2cad}'..='\u{2cad}', '\u{2caf}'..='\u{2caf}', '\u{2cb1}'..='\u{2cb1}', + '\u{2cb3}'..='\u{2cb3}', '\u{2cb5}'..='\u{2cb5}', '\u{2cb7}'..='\u{2cb7}', + '\u{2cb9}'..='\u{2cb9}', '\u{2cbb}'..='\u{2cbb}', '\u{2cbd}'..='\u{2cbd}', + '\u{2cbf}'..='\u{2cbf}', '\u{2cc1}'..='\u{2cc1}', '\u{2cc3}'..='\u{2cc3}', + '\u{2cc5}'..='\u{2cc5}', '\u{2cc7}'..='\u{2cc7}', '\u{2cc9}'..='\u{2cc9}', + '\u{2ccb}'..='\u{2ccb}', '\u{2ccd}'..='\u{2ccd}', '\u{2ccf}'..='\u{2ccf}', + '\u{2cd1}'..='\u{2cd1}', '\u{2cd3}'..='\u{2cd3}', '\u{2cd5}'..='\u{2cd5}', + '\u{2cd7}'..='\u{2cd7}', '\u{2cd9}'..='\u{2cd9}', '\u{2cdb}'..='\u{2cdb}', + '\u{2cdd}'..='\u{2cdd}', '\u{2cdf}'..='\u{2cdf}', '\u{2ce1}'..='\u{2ce1}', + '\u{2ce3}'..='\u{2ce4}', '\u{2cec}'..='\u{2cec}', '\u{2cee}'..='\u{2cee}', + '\u{2cf3}'..='\u{2cf3}', '\u{2d00}'..='\u{2d25}', '\u{2d27}'..='\u{2d27}', + '\u{2d2d}'..='\u{2d2d}', '\u{a641}'..='\u{a641}', '\u{a643}'..='\u{a643}', + '\u{a645}'..='\u{a645}', '\u{a647}'..='\u{a647}', '\u{a649}'..='\u{a649}', + '\u{a64b}'..='\u{a64b}', '\u{a64d}'..='\u{a64d}', '\u{a64f}'..='\u{a64f}', + '\u{a651}'..='\u{a651}', '\u{a653}'..='\u{a653}', '\u{a655}'..='\u{a655}', + '\u{a657}'..='\u{a657}', '\u{a659}'..='\u{a659}', '\u{a65b}'..='\u{a65b}', + '\u{a65d}'..='\u{a65d}', '\u{a65f}'..='\u{a65f}', '\u{a661}'..='\u{a661}', + '\u{a663}'..='\u{a663}', '\u{a665}'..='\u{a665}', '\u{a667}'..='\u{a667}', + '\u{a669}'..='\u{a669}', '\u{a66b}'..='\u{a66b}', '\u{a66d}'..='\u{a66d}', + '\u{a681}'..='\u{a681}', '\u{a683}'..='\u{a683}', '\u{a685}'..='\u{a685}', + '\u{a687}'..='\u{a687}', '\u{a689}'..='\u{a689}', '\u{a68b}'..='\u{a68b}', + '\u{a68d}'..='\u{a68d}', '\u{a68f}'..='\u{a68f}', '\u{a691}'..='\u{a691}', + '\u{a693}'..='\u{a693}', '\u{a695}'..='\u{a695}', '\u{a697}'..='\u{a697}', + '\u{a699}'..='\u{a699}', '\u{a69b}'..='\u{a69d}', '\u{a723}'..='\u{a723}', + '\u{a725}'..='\u{a725}', '\u{a727}'..='\u{a727}', '\u{a729}'..='\u{a729}', + '\u{a72b}'..='\u{a72b}', '\u{a72d}'..='\u{a72d}', '\u{a72f}'..='\u{a731}', + '\u{a733}'..='\u{a733}', '\u{a735}'..='\u{a735}', '\u{a737}'..='\u{a737}', + '\u{a739}'..='\u{a739}', '\u{a73b}'..='\u{a73b}', '\u{a73d}'..='\u{a73d}', + '\u{a73f}'..='\u{a73f}', '\u{a741}'..='\u{a741}', '\u{a743}'..='\u{a743}', + '\u{a745}'..='\u{a745}', '\u{a747}'..='\u{a747}', '\u{a749}'..='\u{a749}', + '\u{a74b}'..='\u{a74b}', '\u{a74d}'..='\u{a74d}', '\u{a74f}'..='\u{a74f}', + '\u{a751}'..='\u{a751}', '\u{a753}'..='\u{a753}', '\u{a755}'..='\u{a755}', + '\u{a757}'..='\u{a757}', '\u{a759}'..='\u{a759}', '\u{a75b}'..='\u{a75b}', + '\u{a75d}'..='\u{a75d}', '\u{a75f}'..='\u{a75f}', '\u{a761}'..='\u{a761}', + '\u{a763}'..='\u{a763}', '\u{a765}'..='\u{a765}', '\u{a767}'..='\u{a767}', + '\u{a769}'..='\u{a769}', '\u{a76b}'..='\u{a76b}', '\u{a76d}'..='\u{a76d}', + '\u{a76f}'..='\u{a778}', '\u{a77a}'..='\u{a77a}', '\u{a77c}'..='\u{a77c}', + '\u{a77f}'..='\u{a77f}', '\u{a781}'..='\u{a781}', '\u{a783}'..='\u{a783}', + '\u{a785}'..='\u{a785}', '\u{a787}'..='\u{a787}', '\u{a78c}'..='\u{a78c}', + '\u{a78e}'..='\u{a78e}', '\u{a791}'..='\u{a791}', '\u{a793}'..='\u{a795}', + '\u{a797}'..='\u{a797}', '\u{a799}'..='\u{a799}', '\u{a79b}'..='\u{a79b}', + '\u{a79d}'..='\u{a79d}', '\u{a79f}'..='\u{a79f}', '\u{a7a1}'..='\u{a7a1}', + '\u{a7a3}'..='\u{a7a3}', '\u{a7a5}'..='\u{a7a5}', '\u{a7a7}'..='\u{a7a7}', + '\u{a7a9}'..='\u{a7a9}', '\u{a7af}'..='\u{a7af}', '\u{a7b5}'..='\u{a7b5}', + '\u{a7b7}'..='\u{a7b7}', '\u{a7b9}'..='\u{a7b9}', '\u{a7bb}'..='\u{a7bb}', + '\u{a7bd}'..='\u{a7bd}', '\u{a7bf}'..='\u{a7bf}', '\u{a7c1}'..='\u{a7c1}', + '\u{a7c3}'..='\u{a7c3}', '\u{a7c8}'..='\u{a7c8}', '\u{a7ca}'..='\u{a7ca}', + '\u{a7cd}'..='\u{a7cd}', '\u{a7cf}'..='\u{a7cf}', '\u{a7d1}'..='\u{a7d1}', + '\u{a7d3}'..='\u{a7d3}', '\u{a7d5}'..='\u{a7d5}', '\u{a7d7}'..='\u{a7d7}', + '\u{a7d9}'..='\u{a7d9}', '\u{a7db}'..='\u{a7db}', '\u{a7f1}'..='\u{a7f4}', + '\u{a7f6}'..='\u{a7f6}', '\u{a7f8}'..='\u{a7fa}', '\u{ab30}'..='\u{ab5a}', + '\u{ab5c}'..='\u{ab69}', '\u{ab70}'..='\u{abbf}', '\u{fb00}'..='\u{fb06}', + '\u{fb13}'..='\u{fb17}', '\u{ff41}'..='\u{ff5a}', '\u{10428}'..='\u{1044f}', + '\u{104d8}'..='\u{104fb}', '\u{10597}'..='\u{105a1}', '\u{105a3}'..='\u{105b1}', + '\u{105b3}'..='\u{105b9}', '\u{105bb}'..='\u{105bc}', '\u{10780}'..='\u{10780}', + '\u{10783}'..='\u{10785}', '\u{10787}'..='\u{107b0}', '\u{107b2}'..='\u{107ba}', + '\u{10cc0}'..='\u{10cf2}', '\u{10d70}'..='\u{10d85}', '\u{118c0}'..='\u{118df}', + '\u{16e60}'..='\u{16e7f}', '\u{16ebb}'..='\u{16ed3}', '\u{1d41a}'..='\u{1d433}', + '\u{1d44e}'..='\u{1d454}', '\u{1d456}'..='\u{1d467}', '\u{1d482}'..='\u{1d49b}', + '\u{1d4b6}'..='\u{1d4b9}', '\u{1d4bb}'..='\u{1d4bb}', '\u{1d4bd}'..='\u{1d4c3}', + '\u{1d4c5}'..='\u{1d4cf}', '\u{1d4ea}'..='\u{1d503}', '\u{1d51e}'..='\u{1d537}', + '\u{1d552}'..='\u{1d56b}', '\u{1d586}'..='\u{1d59f}', '\u{1d5ba}'..='\u{1d5d3}', + '\u{1d5ee}'..='\u{1d607}', '\u{1d622}'..='\u{1d63b}', '\u{1d656}'..='\u{1d66f}', + '\u{1d68a}'..='\u{1d6a5}', '\u{1d6c2}'..='\u{1d6da}', '\u{1d6dc}'..='\u{1d6e1}', + '\u{1d6fc}'..='\u{1d714}', '\u{1d716}'..='\u{1d71b}', '\u{1d736}'..='\u{1d74e}', + '\u{1d750}'..='\u{1d755}', '\u{1d770}'..='\u{1d788}', '\u{1d78a}'..='\u{1d78f}', + '\u{1d7aa}'..='\u{1d7c2}', '\u{1d7c4}'..='\u{1d7c9}', '\u{1d7cb}'..='\u{1d7cb}', + '\u{1df00}'..='\u{1df09}', '\u{1df0b}'..='\u{1df1e}', '\u{1df25}'..='\u{1df2a}', + '\u{1e030}'..='\u{1e06d}', '\u{1e922}'..='\u{1e943}', +]; + +#[rustfmt::skip] +pub(super) static N: &[RangeInclusive; 145] = &[ + '\u{b2}'..='\u{b3}', '\u{b9}'..='\u{b9}', '\u{bc}'..='\u{be}', '\u{660}'..='\u{669}', + '\u{6f0}'..='\u{6f9}', '\u{7c0}'..='\u{7c9}', '\u{966}'..='\u{96f}', '\u{9e6}'..='\u{9ef}', + '\u{9f4}'..='\u{9f9}', '\u{a66}'..='\u{a6f}', '\u{ae6}'..='\u{aef}', '\u{b66}'..='\u{b6f}', + '\u{b72}'..='\u{b77}', '\u{be6}'..='\u{bf2}', '\u{c66}'..='\u{c6f}', '\u{c78}'..='\u{c7e}', + '\u{ce6}'..='\u{cef}', '\u{d58}'..='\u{d5e}', '\u{d66}'..='\u{d78}', '\u{de6}'..='\u{def}', + '\u{e50}'..='\u{e59}', '\u{ed0}'..='\u{ed9}', '\u{f20}'..='\u{f33}', + '\u{1040}'..='\u{1049}', '\u{1090}'..='\u{1099}', '\u{1369}'..='\u{137c}', + '\u{16ee}'..='\u{16f0}', '\u{17e0}'..='\u{17e9}', '\u{17f0}'..='\u{17f9}', + '\u{1810}'..='\u{1819}', '\u{1946}'..='\u{194f}', '\u{19d0}'..='\u{19da}', + '\u{1a80}'..='\u{1a89}', '\u{1a90}'..='\u{1a99}', '\u{1b50}'..='\u{1b59}', + '\u{1bb0}'..='\u{1bb9}', '\u{1c40}'..='\u{1c49}', '\u{1c50}'..='\u{1c59}', + '\u{2070}'..='\u{2070}', '\u{2074}'..='\u{2079}', '\u{2080}'..='\u{2089}', + '\u{2150}'..='\u{2182}', '\u{2185}'..='\u{2189}', '\u{2460}'..='\u{249b}', + '\u{24ea}'..='\u{24ff}', '\u{2776}'..='\u{2793}', '\u{2cfd}'..='\u{2cfd}', + '\u{3007}'..='\u{3007}', '\u{3021}'..='\u{3029}', '\u{3038}'..='\u{303a}', + '\u{3192}'..='\u{3195}', '\u{3220}'..='\u{3229}', '\u{3248}'..='\u{324f}', + '\u{3251}'..='\u{325f}', '\u{3280}'..='\u{3289}', '\u{32b1}'..='\u{32bf}', + '\u{a620}'..='\u{a629}', '\u{a6e6}'..='\u{a6ef}', '\u{a830}'..='\u{a835}', + '\u{a8d0}'..='\u{a8d9}', '\u{a900}'..='\u{a909}', '\u{a9d0}'..='\u{a9d9}', + '\u{a9f0}'..='\u{a9f9}', '\u{aa50}'..='\u{aa59}', '\u{abf0}'..='\u{abf9}', + '\u{ff10}'..='\u{ff19}', '\u{10107}'..='\u{10133}', '\u{10140}'..='\u{10178}', + '\u{1018a}'..='\u{1018b}', '\u{102e1}'..='\u{102fb}', '\u{10320}'..='\u{10323}', + '\u{10341}'..='\u{10341}', '\u{1034a}'..='\u{1034a}', '\u{103d1}'..='\u{103d5}', + '\u{104a0}'..='\u{104a9}', '\u{10858}'..='\u{1085f}', '\u{10879}'..='\u{1087f}', + '\u{108a7}'..='\u{108af}', '\u{108fb}'..='\u{108ff}', '\u{10916}'..='\u{1091b}', + '\u{109bc}'..='\u{109bd}', '\u{109c0}'..='\u{109cf}', '\u{109d2}'..='\u{109ff}', + '\u{10a40}'..='\u{10a48}', '\u{10a7d}'..='\u{10a7e}', '\u{10a9d}'..='\u{10a9f}', + '\u{10aeb}'..='\u{10aef}', '\u{10b58}'..='\u{10b5f}', '\u{10b78}'..='\u{10b7f}', + '\u{10ba9}'..='\u{10baf}', '\u{10cfa}'..='\u{10cff}', '\u{10d30}'..='\u{10d39}', + '\u{10d40}'..='\u{10d49}', '\u{10e60}'..='\u{10e7e}', '\u{10f1d}'..='\u{10f26}', + '\u{10f51}'..='\u{10f54}', '\u{10fc5}'..='\u{10fcb}', '\u{11052}'..='\u{1106f}', + '\u{110f0}'..='\u{110f9}', '\u{11136}'..='\u{1113f}', '\u{111d0}'..='\u{111d9}', + '\u{111e1}'..='\u{111f4}', '\u{112f0}'..='\u{112f9}', '\u{11450}'..='\u{11459}', + '\u{114d0}'..='\u{114d9}', '\u{11650}'..='\u{11659}', '\u{116c0}'..='\u{116c9}', + '\u{116d0}'..='\u{116e3}', '\u{11730}'..='\u{1173b}', '\u{118e0}'..='\u{118f2}', + '\u{11950}'..='\u{11959}', '\u{11bf0}'..='\u{11bf9}', '\u{11c50}'..='\u{11c6c}', + '\u{11d50}'..='\u{11d59}', '\u{11da0}'..='\u{11da9}', '\u{11de0}'..='\u{11de9}', + '\u{11f50}'..='\u{11f59}', '\u{11fc0}'..='\u{11fd4}', '\u{12400}'..='\u{1246e}', + '\u{16130}'..='\u{16139}', '\u{16a60}'..='\u{16a69}', '\u{16ac0}'..='\u{16ac9}', + '\u{16b50}'..='\u{16b59}', '\u{16b5b}'..='\u{16b61}', '\u{16d70}'..='\u{16d79}', + '\u{16e80}'..='\u{16e96}', '\u{16ff4}'..='\u{16ff6}', '\u{1ccf0}'..='\u{1ccf9}', + '\u{1d2c0}'..='\u{1d2d3}', '\u{1d2e0}'..='\u{1d2f3}', '\u{1d360}'..='\u{1d378}', + '\u{1d7ce}'..='\u{1d7ff}', '\u{1e140}'..='\u{1e149}', '\u{1e2f0}'..='\u{1e2f9}', + '\u{1e4f0}'..='\u{1e4f9}', '\u{1e5f1}'..='\u{1e5fa}', '\u{1e8c7}'..='\u{1e8cf}', + '\u{1e950}'..='\u{1e959}', '\u{1ec71}'..='\u{1ecab}', '\u{1ecad}'..='\u{1ecaf}', + '\u{1ecb1}'..='\u{1ecb4}', '\u{1ed01}'..='\u{1ed2d}', '\u{1ed2f}'..='\u{1ed3d}', + '\u{1f100}'..='\u{1f10c}', '\u{1fbf0}'..='\u{1fbf9}', +]; + +#[rustfmt::skip] +pub(super) static UPPERCASE: &[RangeInclusive; 659] = &[ + '\u{c0}'..='\u{d6}', '\u{d8}'..='\u{de}', '\u{100}'..='\u{100}', '\u{102}'..='\u{102}', + '\u{104}'..='\u{104}', '\u{106}'..='\u{106}', '\u{108}'..='\u{108}', '\u{10a}'..='\u{10a}', + '\u{10c}'..='\u{10c}', '\u{10e}'..='\u{10e}', '\u{110}'..='\u{110}', '\u{112}'..='\u{112}', + '\u{114}'..='\u{114}', '\u{116}'..='\u{116}', '\u{118}'..='\u{118}', '\u{11a}'..='\u{11a}', + '\u{11c}'..='\u{11c}', '\u{11e}'..='\u{11e}', '\u{120}'..='\u{120}', '\u{122}'..='\u{122}', + '\u{124}'..='\u{124}', '\u{126}'..='\u{126}', '\u{128}'..='\u{128}', '\u{12a}'..='\u{12a}', + '\u{12c}'..='\u{12c}', '\u{12e}'..='\u{12e}', '\u{130}'..='\u{130}', '\u{132}'..='\u{132}', + '\u{134}'..='\u{134}', '\u{136}'..='\u{136}', '\u{139}'..='\u{139}', '\u{13b}'..='\u{13b}', + '\u{13d}'..='\u{13d}', '\u{13f}'..='\u{13f}', '\u{141}'..='\u{141}', '\u{143}'..='\u{143}', + '\u{145}'..='\u{145}', '\u{147}'..='\u{147}', '\u{14a}'..='\u{14a}', '\u{14c}'..='\u{14c}', + '\u{14e}'..='\u{14e}', '\u{150}'..='\u{150}', '\u{152}'..='\u{152}', '\u{154}'..='\u{154}', + '\u{156}'..='\u{156}', '\u{158}'..='\u{158}', '\u{15a}'..='\u{15a}', '\u{15c}'..='\u{15c}', + '\u{15e}'..='\u{15e}', '\u{160}'..='\u{160}', '\u{162}'..='\u{162}', '\u{164}'..='\u{164}', + '\u{166}'..='\u{166}', '\u{168}'..='\u{168}', '\u{16a}'..='\u{16a}', '\u{16c}'..='\u{16c}', + '\u{16e}'..='\u{16e}', '\u{170}'..='\u{170}', '\u{172}'..='\u{172}', '\u{174}'..='\u{174}', + '\u{176}'..='\u{176}', '\u{178}'..='\u{179}', '\u{17b}'..='\u{17b}', '\u{17d}'..='\u{17d}', + '\u{181}'..='\u{182}', '\u{184}'..='\u{184}', '\u{186}'..='\u{187}', '\u{189}'..='\u{18b}', + '\u{18e}'..='\u{191}', '\u{193}'..='\u{194}', '\u{196}'..='\u{198}', '\u{19c}'..='\u{19d}', + '\u{19f}'..='\u{1a0}', '\u{1a2}'..='\u{1a2}', '\u{1a4}'..='\u{1a4}', '\u{1a6}'..='\u{1a7}', + '\u{1a9}'..='\u{1a9}', '\u{1ac}'..='\u{1ac}', '\u{1ae}'..='\u{1af}', '\u{1b1}'..='\u{1b3}', + '\u{1b5}'..='\u{1b5}', '\u{1b7}'..='\u{1b8}', '\u{1bc}'..='\u{1bc}', '\u{1c4}'..='\u{1c4}', + '\u{1c7}'..='\u{1c7}', '\u{1ca}'..='\u{1ca}', '\u{1cd}'..='\u{1cd}', '\u{1cf}'..='\u{1cf}', + '\u{1d1}'..='\u{1d1}', '\u{1d3}'..='\u{1d3}', '\u{1d5}'..='\u{1d5}', '\u{1d7}'..='\u{1d7}', + '\u{1d9}'..='\u{1d9}', '\u{1db}'..='\u{1db}', '\u{1de}'..='\u{1de}', '\u{1e0}'..='\u{1e0}', + '\u{1e2}'..='\u{1e2}', '\u{1e4}'..='\u{1e4}', '\u{1e6}'..='\u{1e6}', '\u{1e8}'..='\u{1e8}', + '\u{1ea}'..='\u{1ea}', '\u{1ec}'..='\u{1ec}', '\u{1ee}'..='\u{1ee}', '\u{1f1}'..='\u{1f1}', + '\u{1f4}'..='\u{1f4}', '\u{1f6}'..='\u{1f8}', '\u{1fa}'..='\u{1fa}', '\u{1fc}'..='\u{1fc}', + '\u{1fe}'..='\u{1fe}', '\u{200}'..='\u{200}', '\u{202}'..='\u{202}', '\u{204}'..='\u{204}', + '\u{206}'..='\u{206}', '\u{208}'..='\u{208}', '\u{20a}'..='\u{20a}', '\u{20c}'..='\u{20c}', + '\u{20e}'..='\u{20e}', '\u{210}'..='\u{210}', '\u{212}'..='\u{212}', '\u{214}'..='\u{214}', + '\u{216}'..='\u{216}', '\u{218}'..='\u{218}', '\u{21a}'..='\u{21a}', '\u{21c}'..='\u{21c}', + '\u{21e}'..='\u{21e}', '\u{220}'..='\u{220}', '\u{222}'..='\u{222}', '\u{224}'..='\u{224}', + '\u{226}'..='\u{226}', '\u{228}'..='\u{228}', '\u{22a}'..='\u{22a}', '\u{22c}'..='\u{22c}', + '\u{22e}'..='\u{22e}', '\u{230}'..='\u{230}', '\u{232}'..='\u{232}', '\u{23a}'..='\u{23b}', + '\u{23d}'..='\u{23e}', '\u{241}'..='\u{241}', '\u{243}'..='\u{246}', '\u{248}'..='\u{248}', + '\u{24a}'..='\u{24a}', '\u{24c}'..='\u{24c}', '\u{24e}'..='\u{24e}', '\u{370}'..='\u{370}', + '\u{372}'..='\u{372}', '\u{376}'..='\u{376}', '\u{37f}'..='\u{37f}', '\u{386}'..='\u{386}', + '\u{388}'..='\u{38a}', '\u{38c}'..='\u{38c}', '\u{38e}'..='\u{38f}', '\u{391}'..='\u{3a1}', + '\u{3a3}'..='\u{3ab}', '\u{3cf}'..='\u{3cf}', '\u{3d2}'..='\u{3d4}', '\u{3d8}'..='\u{3d8}', + '\u{3da}'..='\u{3da}', '\u{3dc}'..='\u{3dc}', '\u{3de}'..='\u{3de}', '\u{3e0}'..='\u{3e0}', + '\u{3e2}'..='\u{3e2}', '\u{3e4}'..='\u{3e4}', '\u{3e6}'..='\u{3e6}', '\u{3e8}'..='\u{3e8}', + '\u{3ea}'..='\u{3ea}', '\u{3ec}'..='\u{3ec}', '\u{3ee}'..='\u{3ee}', '\u{3f4}'..='\u{3f4}', + '\u{3f7}'..='\u{3f7}', '\u{3f9}'..='\u{3fa}', '\u{3fd}'..='\u{42f}', '\u{460}'..='\u{460}', + '\u{462}'..='\u{462}', '\u{464}'..='\u{464}', '\u{466}'..='\u{466}', '\u{468}'..='\u{468}', + '\u{46a}'..='\u{46a}', '\u{46c}'..='\u{46c}', '\u{46e}'..='\u{46e}', '\u{470}'..='\u{470}', + '\u{472}'..='\u{472}', '\u{474}'..='\u{474}', '\u{476}'..='\u{476}', '\u{478}'..='\u{478}', + '\u{47a}'..='\u{47a}', '\u{47c}'..='\u{47c}', '\u{47e}'..='\u{47e}', '\u{480}'..='\u{480}', + '\u{48a}'..='\u{48a}', '\u{48c}'..='\u{48c}', '\u{48e}'..='\u{48e}', '\u{490}'..='\u{490}', + '\u{492}'..='\u{492}', '\u{494}'..='\u{494}', '\u{496}'..='\u{496}', '\u{498}'..='\u{498}', + '\u{49a}'..='\u{49a}', '\u{49c}'..='\u{49c}', '\u{49e}'..='\u{49e}', '\u{4a0}'..='\u{4a0}', + '\u{4a2}'..='\u{4a2}', '\u{4a4}'..='\u{4a4}', '\u{4a6}'..='\u{4a6}', '\u{4a8}'..='\u{4a8}', + '\u{4aa}'..='\u{4aa}', '\u{4ac}'..='\u{4ac}', '\u{4ae}'..='\u{4ae}', '\u{4b0}'..='\u{4b0}', + '\u{4b2}'..='\u{4b2}', '\u{4b4}'..='\u{4b4}', '\u{4b6}'..='\u{4b6}', '\u{4b8}'..='\u{4b8}', + '\u{4ba}'..='\u{4ba}', '\u{4bc}'..='\u{4bc}', '\u{4be}'..='\u{4be}', '\u{4c0}'..='\u{4c1}', + '\u{4c3}'..='\u{4c3}', '\u{4c5}'..='\u{4c5}', '\u{4c7}'..='\u{4c7}', '\u{4c9}'..='\u{4c9}', + '\u{4cb}'..='\u{4cb}', '\u{4cd}'..='\u{4cd}', '\u{4d0}'..='\u{4d0}', '\u{4d2}'..='\u{4d2}', + '\u{4d4}'..='\u{4d4}', '\u{4d6}'..='\u{4d6}', '\u{4d8}'..='\u{4d8}', '\u{4da}'..='\u{4da}', + '\u{4dc}'..='\u{4dc}', '\u{4de}'..='\u{4de}', '\u{4e0}'..='\u{4e0}', '\u{4e2}'..='\u{4e2}', + '\u{4e4}'..='\u{4e4}', '\u{4e6}'..='\u{4e6}', '\u{4e8}'..='\u{4e8}', '\u{4ea}'..='\u{4ea}', + '\u{4ec}'..='\u{4ec}', '\u{4ee}'..='\u{4ee}', '\u{4f0}'..='\u{4f0}', '\u{4f2}'..='\u{4f2}', + '\u{4f4}'..='\u{4f4}', '\u{4f6}'..='\u{4f6}', '\u{4f8}'..='\u{4f8}', '\u{4fa}'..='\u{4fa}', + '\u{4fc}'..='\u{4fc}', '\u{4fe}'..='\u{4fe}', '\u{500}'..='\u{500}', '\u{502}'..='\u{502}', + '\u{504}'..='\u{504}', '\u{506}'..='\u{506}', '\u{508}'..='\u{508}', '\u{50a}'..='\u{50a}', + '\u{50c}'..='\u{50c}', '\u{50e}'..='\u{50e}', '\u{510}'..='\u{510}', '\u{512}'..='\u{512}', + '\u{514}'..='\u{514}', '\u{516}'..='\u{516}', '\u{518}'..='\u{518}', '\u{51a}'..='\u{51a}', + '\u{51c}'..='\u{51c}', '\u{51e}'..='\u{51e}', '\u{520}'..='\u{520}', '\u{522}'..='\u{522}', + '\u{524}'..='\u{524}', '\u{526}'..='\u{526}', '\u{528}'..='\u{528}', '\u{52a}'..='\u{52a}', + '\u{52c}'..='\u{52c}', '\u{52e}'..='\u{52e}', '\u{531}'..='\u{556}', + '\u{10a0}'..='\u{10c5}', '\u{10c7}'..='\u{10c7}', '\u{10cd}'..='\u{10cd}', + '\u{13a0}'..='\u{13f5}', '\u{1c89}'..='\u{1c89}', '\u{1c90}'..='\u{1cba}', + '\u{1cbd}'..='\u{1cbf}', '\u{1e00}'..='\u{1e00}', '\u{1e02}'..='\u{1e02}', + '\u{1e04}'..='\u{1e04}', '\u{1e06}'..='\u{1e06}', '\u{1e08}'..='\u{1e08}', + '\u{1e0a}'..='\u{1e0a}', '\u{1e0c}'..='\u{1e0c}', '\u{1e0e}'..='\u{1e0e}', + '\u{1e10}'..='\u{1e10}', '\u{1e12}'..='\u{1e12}', '\u{1e14}'..='\u{1e14}', + '\u{1e16}'..='\u{1e16}', '\u{1e18}'..='\u{1e18}', '\u{1e1a}'..='\u{1e1a}', + '\u{1e1c}'..='\u{1e1c}', '\u{1e1e}'..='\u{1e1e}', '\u{1e20}'..='\u{1e20}', + '\u{1e22}'..='\u{1e22}', '\u{1e24}'..='\u{1e24}', '\u{1e26}'..='\u{1e26}', + '\u{1e28}'..='\u{1e28}', '\u{1e2a}'..='\u{1e2a}', '\u{1e2c}'..='\u{1e2c}', + '\u{1e2e}'..='\u{1e2e}', '\u{1e30}'..='\u{1e30}', '\u{1e32}'..='\u{1e32}', + '\u{1e34}'..='\u{1e34}', '\u{1e36}'..='\u{1e36}', '\u{1e38}'..='\u{1e38}', + '\u{1e3a}'..='\u{1e3a}', '\u{1e3c}'..='\u{1e3c}', '\u{1e3e}'..='\u{1e3e}', + '\u{1e40}'..='\u{1e40}', '\u{1e42}'..='\u{1e42}', '\u{1e44}'..='\u{1e44}', + '\u{1e46}'..='\u{1e46}', '\u{1e48}'..='\u{1e48}', '\u{1e4a}'..='\u{1e4a}', + '\u{1e4c}'..='\u{1e4c}', '\u{1e4e}'..='\u{1e4e}', '\u{1e50}'..='\u{1e50}', + '\u{1e52}'..='\u{1e52}', '\u{1e54}'..='\u{1e54}', '\u{1e56}'..='\u{1e56}', + '\u{1e58}'..='\u{1e58}', '\u{1e5a}'..='\u{1e5a}', '\u{1e5c}'..='\u{1e5c}', + '\u{1e5e}'..='\u{1e5e}', '\u{1e60}'..='\u{1e60}', '\u{1e62}'..='\u{1e62}', + '\u{1e64}'..='\u{1e64}', '\u{1e66}'..='\u{1e66}', '\u{1e68}'..='\u{1e68}', + '\u{1e6a}'..='\u{1e6a}', '\u{1e6c}'..='\u{1e6c}', '\u{1e6e}'..='\u{1e6e}', + '\u{1e70}'..='\u{1e70}', '\u{1e72}'..='\u{1e72}', '\u{1e74}'..='\u{1e74}', + '\u{1e76}'..='\u{1e76}', '\u{1e78}'..='\u{1e78}', '\u{1e7a}'..='\u{1e7a}', + '\u{1e7c}'..='\u{1e7c}', '\u{1e7e}'..='\u{1e7e}', '\u{1e80}'..='\u{1e80}', + '\u{1e82}'..='\u{1e82}', '\u{1e84}'..='\u{1e84}', '\u{1e86}'..='\u{1e86}', + '\u{1e88}'..='\u{1e88}', '\u{1e8a}'..='\u{1e8a}', '\u{1e8c}'..='\u{1e8c}', + '\u{1e8e}'..='\u{1e8e}', '\u{1e90}'..='\u{1e90}', '\u{1e92}'..='\u{1e92}', + '\u{1e94}'..='\u{1e94}', '\u{1e9e}'..='\u{1e9e}', '\u{1ea0}'..='\u{1ea0}', + '\u{1ea2}'..='\u{1ea2}', '\u{1ea4}'..='\u{1ea4}', '\u{1ea6}'..='\u{1ea6}', + '\u{1ea8}'..='\u{1ea8}', '\u{1eaa}'..='\u{1eaa}', '\u{1eac}'..='\u{1eac}', + '\u{1eae}'..='\u{1eae}', '\u{1eb0}'..='\u{1eb0}', '\u{1eb2}'..='\u{1eb2}', + '\u{1eb4}'..='\u{1eb4}', '\u{1eb6}'..='\u{1eb6}', '\u{1eb8}'..='\u{1eb8}', + '\u{1eba}'..='\u{1eba}', '\u{1ebc}'..='\u{1ebc}', '\u{1ebe}'..='\u{1ebe}', + '\u{1ec0}'..='\u{1ec0}', '\u{1ec2}'..='\u{1ec2}', '\u{1ec4}'..='\u{1ec4}', + '\u{1ec6}'..='\u{1ec6}', '\u{1ec8}'..='\u{1ec8}', '\u{1eca}'..='\u{1eca}', + '\u{1ecc}'..='\u{1ecc}', '\u{1ece}'..='\u{1ece}', '\u{1ed0}'..='\u{1ed0}', + '\u{1ed2}'..='\u{1ed2}', '\u{1ed4}'..='\u{1ed4}', '\u{1ed6}'..='\u{1ed6}', + '\u{1ed8}'..='\u{1ed8}', '\u{1eda}'..='\u{1eda}', '\u{1edc}'..='\u{1edc}', + '\u{1ede}'..='\u{1ede}', '\u{1ee0}'..='\u{1ee0}', '\u{1ee2}'..='\u{1ee2}', + '\u{1ee4}'..='\u{1ee4}', '\u{1ee6}'..='\u{1ee6}', '\u{1ee8}'..='\u{1ee8}', + '\u{1eea}'..='\u{1eea}', '\u{1eec}'..='\u{1eec}', '\u{1eee}'..='\u{1eee}', + '\u{1ef0}'..='\u{1ef0}', '\u{1ef2}'..='\u{1ef2}', '\u{1ef4}'..='\u{1ef4}', + '\u{1ef6}'..='\u{1ef6}', '\u{1ef8}'..='\u{1ef8}', '\u{1efa}'..='\u{1efa}', + '\u{1efc}'..='\u{1efc}', '\u{1efe}'..='\u{1efe}', '\u{1f08}'..='\u{1f0f}', + '\u{1f18}'..='\u{1f1d}', '\u{1f28}'..='\u{1f2f}', '\u{1f38}'..='\u{1f3f}', + '\u{1f48}'..='\u{1f4d}', '\u{1f59}'..='\u{1f59}', '\u{1f5b}'..='\u{1f5b}', + '\u{1f5d}'..='\u{1f5d}', '\u{1f5f}'..='\u{1f5f}', '\u{1f68}'..='\u{1f6f}', + '\u{1fb8}'..='\u{1fbb}', '\u{1fc8}'..='\u{1fcb}', '\u{1fd8}'..='\u{1fdb}', + '\u{1fe8}'..='\u{1fec}', '\u{1ff8}'..='\u{1ffb}', '\u{2102}'..='\u{2102}', + '\u{2107}'..='\u{2107}', '\u{210b}'..='\u{210d}', '\u{2110}'..='\u{2112}', + '\u{2115}'..='\u{2115}', '\u{2119}'..='\u{211d}', '\u{2124}'..='\u{2124}', + '\u{2126}'..='\u{2126}', '\u{2128}'..='\u{2128}', '\u{212a}'..='\u{212d}', + '\u{2130}'..='\u{2133}', '\u{213e}'..='\u{213f}', '\u{2145}'..='\u{2145}', + '\u{2160}'..='\u{216f}', '\u{2183}'..='\u{2183}', '\u{24b6}'..='\u{24cf}', + '\u{2c00}'..='\u{2c2f}', '\u{2c60}'..='\u{2c60}', '\u{2c62}'..='\u{2c64}', + '\u{2c67}'..='\u{2c67}', '\u{2c69}'..='\u{2c69}', '\u{2c6b}'..='\u{2c6b}', + '\u{2c6d}'..='\u{2c70}', '\u{2c72}'..='\u{2c72}', '\u{2c75}'..='\u{2c75}', + '\u{2c7e}'..='\u{2c80}', '\u{2c82}'..='\u{2c82}', '\u{2c84}'..='\u{2c84}', + '\u{2c86}'..='\u{2c86}', '\u{2c88}'..='\u{2c88}', '\u{2c8a}'..='\u{2c8a}', + '\u{2c8c}'..='\u{2c8c}', '\u{2c8e}'..='\u{2c8e}', '\u{2c90}'..='\u{2c90}', + '\u{2c92}'..='\u{2c92}', '\u{2c94}'..='\u{2c94}', '\u{2c96}'..='\u{2c96}', + '\u{2c98}'..='\u{2c98}', '\u{2c9a}'..='\u{2c9a}', '\u{2c9c}'..='\u{2c9c}', + '\u{2c9e}'..='\u{2c9e}', '\u{2ca0}'..='\u{2ca0}', '\u{2ca2}'..='\u{2ca2}', + '\u{2ca4}'..='\u{2ca4}', '\u{2ca6}'..='\u{2ca6}', '\u{2ca8}'..='\u{2ca8}', + '\u{2caa}'..='\u{2caa}', '\u{2cac}'..='\u{2cac}', '\u{2cae}'..='\u{2cae}', + '\u{2cb0}'..='\u{2cb0}', '\u{2cb2}'..='\u{2cb2}', '\u{2cb4}'..='\u{2cb4}', + '\u{2cb6}'..='\u{2cb6}', '\u{2cb8}'..='\u{2cb8}', '\u{2cba}'..='\u{2cba}', + '\u{2cbc}'..='\u{2cbc}', '\u{2cbe}'..='\u{2cbe}', '\u{2cc0}'..='\u{2cc0}', + '\u{2cc2}'..='\u{2cc2}', '\u{2cc4}'..='\u{2cc4}', '\u{2cc6}'..='\u{2cc6}', + '\u{2cc8}'..='\u{2cc8}', '\u{2cca}'..='\u{2cca}', '\u{2ccc}'..='\u{2ccc}', + '\u{2cce}'..='\u{2cce}', '\u{2cd0}'..='\u{2cd0}', '\u{2cd2}'..='\u{2cd2}', + '\u{2cd4}'..='\u{2cd4}', '\u{2cd6}'..='\u{2cd6}', '\u{2cd8}'..='\u{2cd8}', + '\u{2cda}'..='\u{2cda}', '\u{2cdc}'..='\u{2cdc}', '\u{2cde}'..='\u{2cde}', + '\u{2ce0}'..='\u{2ce0}', '\u{2ce2}'..='\u{2ce2}', '\u{2ceb}'..='\u{2ceb}', + '\u{2ced}'..='\u{2ced}', '\u{2cf2}'..='\u{2cf2}', '\u{a640}'..='\u{a640}', + '\u{a642}'..='\u{a642}', '\u{a644}'..='\u{a644}', '\u{a646}'..='\u{a646}', + '\u{a648}'..='\u{a648}', '\u{a64a}'..='\u{a64a}', '\u{a64c}'..='\u{a64c}', + '\u{a64e}'..='\u{a64e}', '\u{a650}'..='\u{a650}', '\u{a652}'..='\u{a652}', + '\u{a654}'..='\u{a654}', '\u{a656}'..='\u{a656}', '\u{a658}'..='\u{a658}', + '\u{a65a}'..='\u{a65a}', '\u{a65c}'..='\u{a65c}', '\u{a65e}'..='\u{a65e}', + '\u{a660}'..='\u{a660}', '\u{a662}'..='\u{a662}', '\u{a664}'..='\u{a664}', + '\u{a666}'..='\u{a666}', '\u{a668}'..='\u{a668}', '\u{a66a}'..='\u{a66a}', + '\u{a66c}'..='\u{a66c}', '\u{a680}'..='\u{a680}', '\u{a682}'..='\u{a682}', + '\u{a684}'..='\u{a684}', '\u{a686}'..='\u{a686}', '\u{a688}'..='\u{a688}', + '\u{a68a}'..='\u{a68a}', '\u{a68c}'..='\u{a68c}', '\u{a68e}'..='\u{a68e}', + '\u{a690}'..='\u{a690}', '\u{a692}'..='\u{a692}', '\u{a694}'..='\u{a694}', + '\u{a696}'..='\u{a696}', '\u{a698}'..='\u{a698}', '\u{a69a}'..='\u{a69a}', + '\u{a722}'..='\u{a722}', '\u{a724}'..='\u{a724}', '\u{a726}'..='\u{a726}', + '\u{a728}'..='\u{a728}', '\u{a72a}'..='\u{a72a}', '\u{a72c}'..='\u{a72c}', + '\u{a72e}'..='\u{a72e}', '\u{a732}'..='\u{a732}', '\u{a734}'..='\u{a734}', + '\u{a736}'..='\u{a736}', '\u{a738}'..='\u{a738}', '\u{a73a}'..='\u{a73a}', + '\u{a73c}'..='\u{a73c}', '\u{a73e}'..='\u{a73e}', '\u{a740}'..='\u{a740}', + '\u{a742}'..='\u{a742}', '\u{a744}'..='\u{a744}', '\u{a746}'..='\u{a746}', + '\u{a748}'..='\u{a748}', '\u{a74a}'..='\u{a74a}', '\u{a74c}'..='\u{a74c}', + '\u{a74e}'..='\u{a74e}', '\u{a750}'..='\u{a750}', '\u{a752}'..='\u{a752}', + '\u{a754}'..='\u{a754}', '\u{a756}'..='\u{a756}', '\u{a758}'..='\u{a758}', + '\u{a75a}'..='\u{a75a}', '\u{a75c}'..='\u{a75c}', '\u{a75e}'..='\u{a75e}', + '\u{a760}'..='\u{a760}', '\u{a762}'..='\u{a762}', '\u{a764}'..='\u{a764}', + '\u{a766}'..='\u{a766}', '\u{a768}'..='\u{a768}', '\u{a76a}'..='\u{a76a}', + '\u{a76c}'..='\u{a76c}', '\u{a76e}'..='\u{a76e}', '\u{a779}'..='\u{a779}', + '\u{a77b}'..='\u{a77b}', '\u{a77d}'..='\u{a77e}', '\u{a780}'..='\u{a780}', + '\u{a782}'..='\u{a782}', '\u{a784}'..='\u{a784}', '\u{a786}'..='\u{a786}', + '\u{a78b}'..='\u{a78b}', '\u{a78d}'..='\u{a78d}', '\u{a790}'..='\u{a790}', + '\u{a792}'..='\u{a792}', '\u{a796}'..='\u{a796}', '\u{a798}'..='\u{a798}', + '\u{a79a}'..='\u{a79a}', '\u{a79c}'..='\u{a79c}', '\u{a79e}'..='\u{a79e}', + '\u{a7a0}'..='\u{a7a0}', '\u{a7a2}'..='\u{a7a2}', '\u{a7a4}'..='\u{a7a4}', + '\u{a7a6}'..='\u{a7a6}', '\u{a7a8}'..='\u{a7a8}', '\u{a7aa}'..='\u{a7ae}', + '\u{a7b0}'..='\u{a7b4}', '\u{a7b6}'..='\u{a7b6}', '\u{a7b8}'..='\u{a7b8}', + '\u{a7ba}'..='\u{a7ba}', '\u{a7bc}'..='\u{a7bc}', '\u{a7be}'..='\u{a7be}', + '\u{a7c0}'..='\u{a7c0}', '\u{a7c2}'..='\u{a7c2}', '\u{a7c4}'..='\u{a7c7}', + '\u{a7c9}'..='\u{a7c9}', '\u{a7cb}'..='\u{a7cc}', '\u{a7ce}'..='\u{a7ce}', + '\u{a7d0}'..='\u{a7d0}', '\u{a7d2}'..='\u{a7d2}', '\u{a7d4}'..='\u{a7d4}', + '\u{a7d6}'..='\u{a7d6}', '\u{a7d8}'..='\u{a7d8}', '\u{a7da}'..='\u{a7da}', + '\u{a7dc}'..='\u{a7dc}', '\u{a7f5}'..='\u{a7f5}', '\u{ff21}'..='\u{ff3a}', + '\u{10400}'..='\u{10427}', '\u{104b0}'..='\u{104d3}', '\u{10570}'..='\u{1057a}', + '\u{1057c}'..='\u{1058a}', '\u{1058c}'..='\u{10592}', '\u{10594}'..='\u{10595}', + '\u{10c80}'..='\u{10cb2}', '\u{10d50}'..='\u{10d65}', '\u{118a0}'..='\u{118bf}', + '\u{16e40}'..='\u{16e5f}', '\u{16ea0}'..='\u{16eb8}', '\u{1d400}'..='\u{1d419}', + '\u{1d434}'..='\u{1d44d}', '\u{1d468}'..='\u{1d481}', '\u{1d49c}'..='\u{1d49c}', + '\u{1d49e}'..='\u{1d49f}', '\u{1d4a2}'..='\u{1d4a2}', '\u{1d4a5}'..='\u{1d4a6}', + '\u{1d4a9}'..='\u{1d4ac}', '\u{1d4ae}'..='\u{1d4b5}', '\u{1d4d0}'..='\u{1d4e9}', + '\u{1d504}'..='\u{1d505}', '\u{1d507}'..='\u{1d50a}', '\u{1d50d}'..='\u{1d514}', + '\u{1d516}'..='\u{1d51c}', '\u{1d538}'..='\u{1d539}', '\u{1d53b}'..='\u{1d53e}', + '\u{1d540}'..='\u{1d544}', '\u{1d546}'..='\u{1d546}', '\u{1d54a}'..='\u{1d550}', + '\u{1d56c}'..='\u{1d585}', '\u{1d5a0}'..='\u{1d5b9}', '\u{1d5d4}'..='\u{1d5ed}', + '\u{1d608}'..='\u{1d621}', '\u{1d63c}'..='\u{1d655}', '\u{1d670}'..='\u{1d689}', + '\u{1d6a8}'..='\u{1d6c0}', '\u{1d6e2}'..='\u{1d6fa}', '\u{1d71c}'..='\u{1d734}', + '\u{1d756}'..='\u{1d76e}', '\u{1d790}'..='\u{1d7a8}', '\u{1d7ca}'..='\u{1d7ca}', + '\u{1e900}'..='\u{1e921}', '\u{1f130}'..='\u{1f149}', '\u{1f150}'..='\u{1f169}', + '\u{1f170}'..='\u{1f189}', +]; + +#[rustfmt::skip] +pub(super) static WHITE_SPACE: &[RangeInclusive; 8] = &[ + '\u{85}'..='\u{85}', '\u{a0}'..='\u{a0}', '\u{1680}'..='\u{1680}', '\u{2000}'..='\u{200a}', + '\u{2028}'..='\u{2029}', '\u{202f}'..='\u{202f}', '\u{205f}'..='\u{205f}', + '\u{3000}'..='\u{3000}', +]; + +#[rustfmt::skip] +pub(super) static TO_LOWER: &[(char, [char; 3]); 1488] = &[ + ('\u{41}', ['\u{61}', '\u{0}', '\u{0}']), ('\u{42}', ['\u{62}', '\u{0}', '\u{0}']), + ('\u{43}', ['\u{63}', '\u{0}', '\u{0}']), ('\u{44}', ['\u{64}', '\u{0}', '\u{0}']), + ('\u{45}', ['\u{65}', '\u{0}', '\u{0}']), ('\u{46}', ['\u{66}', '\u{0}', '\u{0}']), + ('\u{47}', ['\u{67}', '\u{0}', '\u{0}']), ('\u{48}', ['\u{68}', '\u{0}', '\u{0}']), + ('\u{49}', ['\u{69}', '\u{0}', '\u{0}']), ('\u{4a}', ['\u{6a}', '\u{0}', '\u{0}']), + ('\u{4b}', ['\u{6b}', '\u{0}', '\u{0}']), ('\u{4c}', ['\u{6c}', '\u{0}', '\u{0}']), + ('\u{4d}', ['\u{6d}', '\u{0}', '\u{0}']), ('\u{4e}', ['\u{6e}', '\u{0}', '\u{0}']), + ('\u{4f}', ['\u{6f}', '\u{0}', '\u{0}']), ('\u{50}', ['\u{70}', '\u{0}', '\u{0}']), + ('\u{51}', ['\u{71}', '\u{0}', '\u{0}']), ('\u{52}', ['\u{72}', '\u{0}', '\u{0}']), + ('\u{53}', ['\u{73}', '\u{0}', '\u{0}']), ('\u{54}', ['\u{74}', '\u{0}', '\u{0}']), + ('\u{55}', ['\u{75}', '\u{0}', '\u{0}']), ('\u{56}', ['\u{76}', '\u{0}', '\u{0}']), + ('\u{57}', ['\u{77}', '\u{0}', '\u{0}']), ('\u{58}', ['\u{78}', '\u{0}', '\u{0}']), + ('\u{59}', ['\u{79}', '\u{0}', '\u{0}']), ('\u{5a}', ['\u{7a}', '\u{0}', '\u{0}']), + ('\u{c0}', ['\u{e0}', '\u{0}', '\u{0}']), ('\u{c1}', ['\u{e1}', '\u{0}', '\u{0}']), + ('\u{c2}', ['\u{e2}', '\u{0}', '\u{0}']), ('\u{c3}', ['\u{e3}', '\u{0}', '\u{0}']), + ('\u{c4}', ['\u{e4}', '\u{0}', '\u{0}']), ('\u{c5}', ['\u{e5}', '\u{0}', '\u{0}']), + ('\u{c6}', ['\u{e6}', '\u{0}', '\u{0}']), ('\u{c7}', ['\u{e7}', '\u{0}', '\u{0}']), + ('\u{c8}', ['\u{e8}', '\u{0}', '\u{0}']), ('\u{c9}', ['\u{e9}', '\u{0}', '\u{0}']), + ('\u{ca}', ['\u{ea}', '\u{0}', '\u{0}']), ('\u{cb}', ['\u{eb}', '\u{0}', '\u{0}']), + ('\u{cc}', ['\u{ec}', '\u{0}', '\u{0}']), ('\u{cd}', ['\u{ed}', '\u{0}', '\u{0}']), + ('\u{ce}', ['\u{ee}', '\u{0}', '\u{0}']), ('\u{cf}', ['\u{ef}', '\u{0}', '\u{0}']), + ('\u{d0}', ['\u{f0}', '\u{0}', '\u{0}']), ('\u{d1}', ['\u{f1}', '\u{0}', '\u{0}']), + ('\u{d2}', ['\u{f2}', '\u{0}', '\u{0}']), ('\u{d3}', ['\u{f3}', '\u{0}', '\u{0}']), + ('\u{d4}', ['\u{f4}', '\u{0}', '\u{0}']), ('\u{d5}', ['\u{f5}', '\u{0}', '\u{0}']), + ('\u{d6}', ['\u{f6}', '\u{0}', '\u{0}']), ('\u{d8}', ['\u{f8}', '\u{0}', '\u{0}']), + ('\u{d9}', ['\u{f9}', '\u{0}', '\u{0}']), ('\u{da}', ['\u{fa}', '\u{0}', '\u{0}']), + ('\u{db}', ['\u{fb}', '\u{0}', '\u{0}']), ('\u{dc}', ['\u{fc}', '\u{0}', '\u{0}']), + ('\u{dd}', ['\u{fd}', '\u{0}', '\u{0}']), ('\u{de}', ['\u{fe}', '\u{0}', '\u{0}']), + ('\u{100}', ['\u{101}', '\u{0}', '\u{0}']), ('\u{102}', ['\u{103}', '\u{0}', '\u{0}']), + ('\u{104}', ['\u{105}', '\u{0}', '\u{0}']), ('\u{106}', ['\u{107}', '\u{0}', '\u{0}']), + ('\u{108}', ['\u{109}', '\u{0}', '\u{0}']), ('\u{10a}', ['\u{10b}', '\u{0}', '\u{0}']), + ('\u{10c}', ['\u{10d}', '\u{0}', '\u{0}']), ('\u{10e}', ['\u{10f}', '\u{0}', '\u{0}']), + ('\u{110}', ['\u{111}', '\u{0}', '\u{0}']), ('\u{112}', ['\u{113}', '\u{0}', '\u{0}']), + ('\u{114}', ['\u{115}', '\u{0}', '\u{0}']), ('\u{116}', ['\u{117}', '\u{0}', '\u{0}']), + ('\u{118}', ['\u{119}', '\u{0}', '\u{0}']), ('\u{11a}', ['\u{11b}', '\u{0}', '\u{0}']), + ('\u{11c}', ['\u{11d}', '\u{0}', '\u{0}']), ('\u{11e}', ['\u{11f}', '\u{0}', '\u{0}']), + ('\u{120}', ['\u{121}', '\u{0}', '\u{0}']), ('\u{122}', ['\u{123}', '\u{0}', '\u{0}']), + ('\u{124}', ['\u{125}', '\u{0}', '\u{0}']), ('\u{126}', ['\u{127}', '\u{0}', '\u{0}']), + ('\u{128}', ['\u{129}', '\u{0}', '\u{0}']), ('\u{12a}', ['\u{12b}', '\u{0}', '\u{0}']), + ('\u{12c}', ['\u{12d}', '\u{0}', '\u{0}']), ('\u{12e}', ['\u{12f}', '\u{0}', '\u{0}']), + ('\u{130}', ['\u{69}', '\u{307}', '\u{0}']), ('\u{132}', ['\u{133}', '\u{0}', '\u{0}']), + ('\u{134}', ['\u{135}', '\u{0}', '\u{0}']), ('\u{136}', ['\u{137}', '\u{0}', '\u{0}']), + ('\u{139}', ['\u{13a}', '\u{0}', '\u{0}']), ('\u{13b}', ['\u{13c}', '\u{0}', '\u{0}']), + ('\u{13d}', ['\u{13e}', '\u{0}', '\u{0}']), ('\u{13f}', ['\u{140}', '\u{0}', '\u{0}']), + ('\u{141}', ['\u{142}', '\u{0}', '\u{0}']), ('\u{143}', ['\u{144}', '\u{0}', '\u{0}']), + ('\u{145}', ['\u{146}', '\u{0}', '\u{0}']), ('\u{147}', ['\u{148}', '\u{0}', '\u{0}']), + ('\u{14a}', ['\u{14b}', '\u{0}', '\u{0}']), ('\u{14c}', ['\u{14d}', '\u{0}', '\u{0}']), + ('\u{14e}', ['\u{14f}', '\u{0}', '\u{0}']), ('\u{150}', ['\u{151}', '\u{0}', '\u{0}']), + ('\u{152}', ['\u{153}', '\u{0}', '\u{0}']), ('\u{154}', ['\u{155}', '\u{0}', '\u{0}']), + ('\u{156}', ['\u{157}', '\u{0}', '\u{0}']), ('\u{158}', ['\u{159}', '\u{0}', '\u{0}']), + ('\u{15a}', ['\u{15b}', '\u{0}', '\u{0}']), ('\u{15c}', ['\u{15d}', '\u{0}', '\u{0}']), + ('\u{15e}', ['\u{15f}', '\u{0}', '\u{0}']), ('\u{160}', ['\u{161}', '\u{0}', '\u{0}']), + ('\u{162}', ['\u{163}', '\u{0}', '\u{0}']), ('\u{164}', ['\u{165}', '\u{0}', '\u{0}']), + ('\u{166}', ['\u{167}', '\u{0}', '\u{0}']), ('\u{168}', ['\u{169}', '\u{0}', '\u{0}']), + ('\u{16a}', ['\u{16b}', '\u{0}', '\u{0}']), ('\u{16c}', ['\u{16d}', '\u{0}', '\u{0}']), + ('\u{16e}', ['\u{16f}', '\u{0}', '\u{0}']), ('\u{170}', ['\u{171}', '\u{0}', '\u{0}']), + ('\u{172}', ['\u{173}', '\u{0}', '\u{0}']), ('\u{174}', ['\u{175}', '\u{0}', '\u{0}']), + ('\u{176}', ['\u{177}', '\u{0}', '\u{0}']), ('\u{178}', ['\u{ff}', '\u{0}', '\u{0}']), + ('\u{179}', ['\u{17a}', '\u{0}', '\u{0}']), ('\u{17b}', ['\u{17c}', '\u{0}', '\u{0}']), + ('\u{17d}', ['\u{17e}', '\u{0}', '\u{0}']), ('\u{181}', ['\u{253}', '\u{0}', '\u{0}']), + ('\u{182}', ['\u{183}', '\u{0}', '\u{0}']), ('\u{184}', ['\u{185}', '\u{0}', '\u{0}']), + ('\u{186}', ['\u{254}', '\u{0}', '\u{0}']), ('\u{187}', ['\u{188}', '\u{0}', '\u{0}']), + ('\u{189}', ['\u{256}', '\u{0}', '\u{0}']), ('\u{18a}', ['\u{257}', '\u{0}', '\u{0}']), + ('\u{18b}', ['\u{18c}', '\u{0}', '\u{0}']), ('\u{18e}', ['\u{1dd}', '\u{0}', '\u{0}']), + ('\u{18f}', ['\u{259}', '\u{0}', '\u{0}']), ('\u{190}', ['\u{25b}', '\u{0}', '\u{0}']), + ('\u{191}', ['\u{192}', '\u{0}', '\u{0}']), ('\u{193}', ['\u{260}', '\u{0}', '\u{0}']), + ('\u{194}', ['\u{263}', '\u{0}', '\u{0}']), ('\u{196}', ['\u{269}', '\u{0}', '\u{0}']), + ('\u{197}', ['\u{268}', '\u{0}', '\u{0}']), ('\u{198}', ['\u{199}', '\u{0}', '\u{0}']), + ('\u{19c}', ['\u{26f}', '\u{0}', '\u{0}']), ('\u{19d}', ['\u{272}', '\u{0}', '\u{0}']), + ('\u{19f}', ['\u{275}', '\u{0}', '\u{0}']), ('\u{1a0}', ['\u{1a1}', '\u{0}', '\u{0}']), + ('\u{1a2}', ['\u{1a3}', '\u{0}', '\u{0}']), ('\u{1a4}', ['\u{1a5}', '\u{0}', '\u{0}']), + ('\u{1a6}', ['\u{280}', '\u{0}', '\u{0}']), ('\u{1a7}', ['\u{1a8}', '\u{0}', '\u{0}']), + ('\u{1a9}', ['\u{283}', '\u{0}', '\u{0}']), ('\u{1ac}', ['\u{1ad}', '\u{0}', '\u{0}']), + ('\u{1ae}', ['\u{288}', '\u{0}', '\u{0}']), ('\u{1af}', ['\u{1b0}', '\u{0}', '\u{0}']), + ('\u{1b1}', ['\u{28a}', '\u{0}', '\u{0}']), ('\u{1b2}', ['\u{28b}', '\u{0}', '\u{0}']), + ('\u{1b3}', ['\u{1b4}', '\u{0}', '\u{0}']), ('\u{1b5}', ['\u{1b6}', '\u{0}', '\u{0}']), + ('\u{1b7}', ['\u{292}', '\u{0}', '\u{0}']), ('\u{1b8}', ['\u{1b9}', '\u{0}', '\u{0}']), + ('\u{1bc}', ['\u{1bd}', '\u{0}', '\u{0}']), ('\u{1c4}', ['\u{1c6}', '\u{0}', '\u{0}']), + ('\u{1c5}', ['\u{1c6}', '\u{0}', '\u{0}']), ('\u{1c7}', ['\u{1c9}', '\u{0}', '\u{0}']), + ('\u{1c8}', ['\u{1c9}', '\u{0}', '\u{0}']), ('\u{1ca}', ['\u{1cc}', '\u{0}', '\u{0}']), + ('\u{1cb}', ['\u{1cc}', '\u{0}', '\u{0}']), ('\u{1cd}', ['\u{1ce}', '\u{0}', '\u{0}']), + ('\u{1cf}', ['\u{1d0}', '\u{0}', '\u{0}']), ('\u{1d1}', ['\u{1d2}', '\u{0}', '\u{0}']), + ('\u{1d3}', ['\u{1d4}', '\u{0}', '\u{0}']), ('\u{1d5}', ['\u{1d6}', '\u{0}', '\u{0}']), + ('\u{1d7}', ['\u{1d8}', '\u{0}', '\u{0}']), ('\u{1d9}', ['\u{1da}', '\u{0}', '\u{0}']), + ('\u{1db}', ['\u{1dc}', '\u{0}', '\u{0}']), ('\u{1de}', ['\u{1df}', '\u{0}', '\u{0}']), + ('\u{1e0}', ['\u{1e1}', '\u{0}', '\u{0}']), ('\u{1e2}', ['\u{1e3}', '\u{0}', '\u{0}']), + ('\u{1e4}', ['\u{1e5}', '\u{0}', '\u{0}']), ('\u{1e6}', ['\u{1e7}', '\u{0}', '\u{0}']), + ('\u{1e8}', ['\u{1e9}', '\u{0}', '\u{0}']), ('\u{1ea}', ['\u{1eb}', '\u{0}', '\u{0}']), + ('\u{1ec}', ['\u{1ed}', '\u{0}', '\u{0}']), ('\u{1ee}', ['\u{1ef}', '\u{0}', '\u{0}']), + ('\u{1f1}', ['\u{1f3}', '\u{0}', '\u{0}']), ('\u{1f2}', ['\u{1f3}', '\u{0}', '\u{0}']), + ('\u{1f4}', ['\u{1f5}', '\u{0}', '\u{0}']), ('\u{1f6}', ['\u{195}', '\u{0}', '\u{0}']), + ('\u{1f7}', ['\u{1bf}', '\u{0}', '\u{0}']), ('\u{1f8}', ['\u{1f9}', '\u{0}', '\u{0}']), + ('\u{1fa}', ['\u{1fb}', '\u{0}', '\u{0}']), ('\u{1fc}', ['\u{1fd}', '\u{0}', '\u{0}']), + ('\u{1fe}', ['\u{1ff}', '\u{0}', '\u{0}']), ('\u{200}', ['\u{201}', '\u{0}', '\u{0}']), + ('\u{202}', ['\u{203}', '\u{0}', '\u{0}']), ('\u{204}', ['\u{205}', '\u{0}', '\u{0}']), + ('\u{206}', ['\u{207}', '\u{0}', '\u{0}']), ('\u{208}', ['\u{209}', '\u{0}', '\u{0}']), + ('\u{20a}', ['\u{20b}', '\u{0}', '\u{0}']), ('\u{20c}', ['\u{20d}', '\u{0}', '\u{0}']), + ('\u{20e}', ['\u{20f}', '\u{0}', '\u{0}']), ('\u{210}', ['\u{211}', '\u{0}', '\u{0}']), + ('\u{212}', ['\u{213}', '\u{0}', '\u{0}']), ('\u{214}', ['\u{215}', '\u{0}', '\u{0}']), + ('\u{216}', ['\u{217}', '\u{0}', '\u{0}']), ('\u{218}', ['\u{219}', '\u{0}', '\u{0}']), + ('\u{21a}', ['\u{21b}', '\u{0}', '\u{0}']), ('\u{21c}', ['\u{21d}', '\u{0}', '\u{0}']), + ('\u{21e}', ['\u{21f}', '\u{0}', '\u{0}']), ('\u{220}', ['\u{19e}', '\u{0}', '\u{0}']), + ('\u{222}', ['\u{223}', '\u{0}', '\u{0}']), ('\u{224}', ['\u{225}', '\u{0}', '\u{0}']), + ('\u{226}', ['\u{227}', '\u{0}', '\u{0}']), ('\u{228}', ['\u{229}', '\u{0}', '\u{0}']), + ('\u{22a}', ['\u{22b}', '\u{0}', '\u{0}']), ('\u{22c}', ['\u{22d}', '\u{0}', '\u{0}']), + ('\u{22e}', ['\u{22f}', '\u{0}', '\u{0}']), ('\u{230}', ['\u{231}', '\u{0}', '\u{0}']), + ('\u{232}', ['\u{233}', '\u{0}', '\u{0}']), ('\u{23a}', ['\u{2c65}', '\u{0}', '\u{0}']), + ('\u{23b}', ['\u{23c}', '\u{0}', '\u{0}']), ('\u{23d}', ['\u{19a}', '\u{0}', '\u{0}']), + ('\u{23e}', ['\u{2c66}', '\u{0}', '\u{0}']), ('\u{241}', ['\u{242}', '\u{0}', '\u{0}']), + ('\u{243}', ['\u{180}', '\u{0}', '\u{0}']), ('\u{244}', ['\u{289}', '\u{0}', '\u{0}']), + ('\u{245}', ['\u{28c}', '\u{0}', '\u{0}']), ('\u{246}', ['\u{247}', '\u{0}', '\u{0}']), + ('\u{248}', ['\u{249}', '\u{0}', '\u{0}']), ('\u{24a}', ['\u{24b}', '\u{0}', '\u{0}']), + ('\u{24c}', ['\u{24d}', '\u{0}', '\u{0}']), ('\u{24e}', ['\u{24f}', '\u{0}', '\u{0}']), + ('\u{370}', ['\u{371}', '\u{0}', '\u{0}']), ('\u{372}', ['\u{373}', '\u{0}', '\u{0}']), + ('\u{376}', ['\u{377}', '\u{0}', '\u{0}']), ('\u{37f}', ['\u{3f3}', '\u{0}', '\u{0}']), + ('\u{386}', ['\u{3ac}', '\u{0}', '\u{0}']), ('\u{388}', ['\u{3ad}', '\u{0}', '\u{0}']), + ('\u{389}', ['\u{3ae}', '\u{0}', '\u{0}']), ('\u{38a}', ['\u{3af}', '\u{0}', '\u{0}']), + ('\u{38c}', ['\u{3cc}', '\u{0}', '\u{0}']), ('\u{38e}', ['\u{3cd}', '\u{0}', '\u{0}']), + ('\u{38f}', ['\u{3ce}', '\u{0}', '\u{0}']), ('\u{391}', ['\u{3b1}', '\u{0}', '\u{0}']), + ('\u{392}', ['\u{3b2}', '\u{0}', '\u{0}']), ('\u{393}', ['\u{3b3}', '\u{0}', '\u{0}']), + ('\u{394}', ['\u{3b4}', '\u{0}', '\u{0}']), ('\u{395}', ['\u{3b5}', '\u{0}', '\u{0}']), + ('\u{396}', ['\u{3b6}', '\u{0}', '\u{0}']), ('\u{397}', ['\u{3b7}', '\u{0}', '\u{0}']), + ('\u{398}', ['\u{3b8}', '\u{0}', '\u{0}']), ('\u{399}', ['\u{3b9}', '\u{0}', '\u{0}']), + ('\u{39a}', ['\u{3ba}', '\u{0}', '\u{0}']), ('\u{39b}', ['\u{3bb}', '\u{0}', '\u{0}']), + ('\u{39c}', ['\u{3bc}', '\u{0}', '\u{0}']), ('\u{39d}', ['\u{3bd}', '\u{0}', '\u{0}']), + ('\u{39e}', ['\u{3be}', '\u{0}', '\u{0}']), ('\u{39f}', ['\u{3bf}', '\u{0}', '\u{0}']), + ('\u{3a0}', ['\u{3c0}', '\u{0}', '\u{0}']), ('\u{3a1}', ['\u{3c1}', '\u{0}', '\u{0}']), + ('\u{3a3}', ['\u{3c3}', '\u{0}', '\u{0}']), ('\u{3a4}', ['\u{3c4}', '\u{0}', '\u{0}']), + ('\u{3a5}', ['\u{3c5}', '\u{0}', '\u{0}']), ('\u{3a6}', ['\u{3c6}', '\u{0}', '\u{0}']), + ('\u{3a7}', ['\u{3c7}', '\u{0}', '\u{0}']), ('\u{3a8}', ['\u{3c8}', '\u{0}', '\u{0}']), + ('\u{3a9}', ['\u{3c9}', '\u{0}', '\u{0}']), ('\u{3aa}', ['\u{3ca}', '\u{0}', '\u{0}']), + ('\u{3ab}', ['\u{3cb}', '\u{0}', '\u{0}']), ('\u{3cf}', ['\u{3d7}', '\u{0}', '\u{0}']), + ('\u{3d8}', ['\u{3d9}', '\u{0}', '\u{0}']), ('\u{3da}', ['\u{3db}', '\u{0}', '\u{0}']), + ('\u{3dc}', ['\u{3dd}', '\u{0}', '\u{0}']), ('\u{3de}', ['\u{3df}', '\u{0}', '\u{0}']), + ('\u{3e0}', ['\u{3e1}', '\u{0}', '\u{0}']), ('\u{3e2}', ['\u{3e3}', '\u{0}', '\u{0}']), + ('\u{3e4}', ['\u{3e5}', '\u{0}', '\u{0}']), ('\u{3e6}', ['\u{3e7}', '\u{0}', '\u{0}']), + ('\u{3e8}', ['\u{3e9}', '\u{0}', '\u{0}']), ('\u{3ea}', ['\u{3eb}', '\u{0}', '\u{0}']), + ('\u{3ec}', ['\u{3ed}', '\u{0}', '\u{0}']), ('\u{3ee}', ['\u{3ef}', '\u{0}', '\u{0}']), + ('\u{3f4}', ['\u{3b8}', '\u{0}', '\u{0}']), ('\u{3f7}', ['\u{3f8}', '\u{0}', '\u{0}']), + ('\u{3f9}', ['\u{3f2}', '\u{0}', '\u{0}']), ('\u{3fa}', ['\u{3fb}', '\u{0}', '\u{0}']), + ('\u{3fd}', ['\u{37b}', '\u{0}', '\u{0}']), ('\u{3fe}', ['\u{37c}', '\u{0}', '\u{0}']), + ('\u{3ff}', ['\u{37d}', '\u{0}', '\u{0}']), ('\u{400}', ['\u{450}', '\u{0}', '\u{0}']), + ('\u{401}', ['\u{451}', '\u{0}', '\u{0}']), ('\u{402}', ['\u{452}', '\u{0}', '\u{0}']), + ('\u{403}', ['\u{453}', '\u{0}', '\u{0}']), ('\u{404}', ['\u{454}', '\u{0}', '\u{0}']), + ('\u{405}', ['\u{455}', '\u{0}', '\u{0}']), ('\u{406}', ['\u{456}', '\u{0}', '\u{0}']), + ('\u{407}', ['\u{457}', '\u{0}', '\u{0}']), ('\u{408}', ['\u{458}', '\u{0}', '\u{0}']), + ('\u{409}', ['\u{459}', '\u{0}', '\u{0}']), ('\u{40a}', ['\u{45a}', '\u{0}', '\u{0}']), + ('\u{40b}', ['\u{45b}', '\u{0}', '\u{0}']), ('\u{40c}', ['\u{45c}', '\u{0}', '\u{0}']), + ('\u{40d}', ['\u{45d}', '\u{0}', '\u{0}']), ('\u{40e}', ['\u{45e}', '\u{0}', '\u{0}']), + ('\u{40f}', ['\u{45f}', '\u{0}', '\u{0}']), ('\u{410}', ['\u{430}', '\u{0}', '\u{0}']), + ('\u{411}', ['\u{431}', '\u{0}', '\u{0}']), ('\u{412}', ['\u{432}', '\u{0}', '\u{0}']), + ('\u{413}', ['\u{433}', '\u{0}', '\u{0}']), ('\u{414}', ['\u{434}', '\u{0}', '\u{0}']), + ('\u{415}', ['\u{435}', '\u{0}', '\u{0}']), ('\u{416}', ['\u{436}', '\u{0}', '\u{0}']), + ('\u{417}', ['\u{437}', '\u{0}', '\u{0}']), ('\u{418}', ['\u{438}', '\u{0}', '\u{0}']), + ('\u{419}', ['\u{439}', '\u{0}', '\u{0}']), ('\u{41a}', ['\u{43a}', '\u{0}', '\u{0}']), + ('\u{41b}', ['\u{43b}', '\u{0}', '\u{0}']), ('\u{41c}', ['\u{43c}', '\u{0}', '\u{0}']), + ('\u{41d}', ['\u{43d}', '\u{0}', '\u{0}']), ('\u{41e}', ['\u{43e}', '\u{0}', '\u{0}']), + ('\u{41f}', ['\u{43f}', '\u{0}', '\u{0}']), ('\u{420}', ['\u{440}', '\u{0}', '\u{0}']), + ('\u{421}', ['\u{441}', '\u{0}', '\u{0}']), ('\u{422}', ['\u{442}', '\u{0}', '\u{0}']), + ('\u{423}', ['\u{443}', '\u{0}', '\u{0}']), ('\u{424}', ['\u{444}', '\u{0}', '\u{0}']), + ('\u{425}', ['\u{445}', '\u{0}', '\u{0}']), ('\u{426}', ['\u{446}', '\u{0}', '\u{0}']), + ('\u{427}', ['\u{447}', '\u{0}', '\u{0}']), ('\u{428}', ['\u{448}', '\u{0}', '\u{0}']), + ('\u{429}', ['\u{449}', '\u{0}', '\u{0}']), ('\u{42a}', ['\u{44a}', '\u{0}', '\u{0}']), + ('\u{42b}', ['\u{44b}', '\u{0}', '\u{0}']), ('\u{42c}', ['\u{44c}', '\u{0}', '\u{0}']), + ('\u{42d}', ['\u{44d}', '\u{0}', '\u{0}']), ('\u{42e}', ['\u{44e}', '\u{0}', '\u{0}']), + ('\u{42f}', ['\u{44f}', '\u{0}', '\u{0}']), ('\u{460}', ['\u{461}', '\u{0}', '\u{0}']), + ('\u{462}', ['\u{463}', '\u{0}', '\u{0}']), ('\u{464}', ['\u{465}', '\u{0}', '\u{0}']), + ('\u{466}', ['\u{467}', '\u{0}', '\u{0}']), ('\u{468}', ['\u{469}', '\u{0}', '\u{0}']), + ('\u{46a}', ['\u{46b}', '\u{0}', '\u{0}']), ('\u{46c}', ['\u{46d}', '\u{0}', '\u{0}']), + ('\u{46e}', ['\u{46f}', '\u{0}', '\u{0}']), ('\u{470}', ['\u{471}', '\u{0}', '\u{0}']), + ('\u{472}', ['\u{473}', '\u{0}', '\u{0}']), ('\u{474}', ['\u{475}', '\u{0}', '\u{0}']), + ('\u{476}', ['\u{477}', '\u{0}', '\u{0}']), ('\u{478}', ['\u{479}', '\u{0}', '\u{0}']), + ('\u{47a}', ['\u{47b}', '\u{0}', '\u{0}']), ('\u{47c}', ['\u{47d}', '\u{0}', '\u{0}']), + ('\u{47e}', ['\u{47f}', '\u{0}', '\u{0}']), ('\u{480}', ['\u{481}', '\u{0}', '\u{0}']), + ('\u{48a}', ['\u{48b}', '\u{0}', '\u{0}']), ('\u{48c}', ['\u{48d}', '\u{0}', '\u{0}']), + ('\u{48e}', ['\u{48f}', '\u{0}', '\u{0}']), ('\u{490}', ['\u{491}', '\u{0}', '\u{0}']), + ('\u{492}', ['\u{493}', '\u{0}', '\u{0}']), ('\u{494}', ['\u{495}', '\u{0}', '\u{0}']), + ('\u{496}', ['\u{497}', '\u{0}', '\u{0}']), ('\u{498}', ['\u{499}', '\u{0}', '\u{0}']), + ('\u{49a}', ['\u{49b}', '\u{0}', '\u{0}']), ('\u{49c}', ['\u{49d}', '\u{0}', '\u{0}']), + ('\u{49e}', ['\u{49f}', '\u{0}', '\u{0}']), ('\u{4a0}', ['\u{4a1}', '\u{0}', '\u{0}']), + ('\u{4a2}', ['\u{4a3}', '\u{0}', '\u{0}']), ('\u{4a4}', ['\u{4a5}', '\u{0}', '\u{0}']), + ('\u{4a6}', ['\u{4a7}', '\u{0}', '\u{0}']), ('\u{4a8}', ['\u{4a9}', '\u{0}', '\u{0}']), + ('\u{4aa}', ['\u{4ab}', '\u{0}', '\u{0}']), ('\u{4ac}', ['\u{4ad}', '\u{0}', '\u{0}']), + ('\u{4ae}', ['\u{4af}', '\u{0}', '\u{0}']), ('\u{4b0}', ['\u{4b1}', '\u{0}', '\u{0}']), + ('\u{4b2}', ['\u{4b3}', '\u{0}', '\u{0}']), ('\u{4b4}', ['\u{4b5}', '\u{0}', '\u{0}']), + ('\u{4b6}', ['\u{4b7}', '\u{0}', '\u{0}']), ('\u{4b8}', ['\u{4b9}', '\u{0}', '\u{0}']), + ('\u{4ba}', ['\u{4bb}', '\u{0}', '\u{0}']), ('\u{4bc}', ['\u{4bd}', '\u{0}', '\u{0}']), + ('\u{4be}', ['\u{4bf}', '\u{0}', '\u{0}']), ('\u{4c0}', ['\u{4cf}', '\u{0}', '\u{0}']), + ('\u{4c1}', ['\u{4c2}', '\u{0}', '\u{0}']), ('\u{4c3}', ['\u{4c4}', '\u{0}', '\u{0}']), + ('\u{4c5}', ['\u{4c6}', '\u{0}', '\u{0}']), ('\u{4c7}', ['\u{4c8}', '\u{0}', '\u{0}']), + ('\u{4c9}', ['\u{4ca}', '\u{0}', '\u{0}']), ('\u{4cb}', ['\u{4cc}', '\u{0}', '\u{0}']), + ('\u{4cd}', ['\u{4ce}', '\u{0}', '\u{0}']), ('\u{4d0}', ['\u{4d1}', '\u{0}', '\u{0}']), + ('\u{4d2}', ['\u{4d3}', '\u{0}', '\u{0}']), ('\u{4d4}', ['\u{4d5}', '\u{0}', '\u{0}']), + ('\u{4d6}', ['\u{4d7}', '\u{0}', '\u{0}']), ('\u{4d8}', ['\u{4d9}', '\u{0}', '\u{0}']), + ('\u{4da}', ['\u{4db}', '\u{0}', '\u{0}']), ('\u{4dc}', ['\u{4dd}', '\u{0}', '\u{0}']), + ('\u{4de}', ['\u{4df}', '\u{0}', '\u{0}']), ('\u{4e0}', ['\u{4e1}', '\u{0}', '\u{0}']), + ('\u{4e2}', ['\u{4e3}', '\u{0}', '\u{0}']), ('\u{4e4}', ['\u{4e5}', '\u{0}', '\u{0}']), + ('\u{4e6}', ['\u{4e7}', '\u{0}', '\u{0}']), ('\u{4e8}', ['\u{4e9}', '\u{0}', '\u{0}']), + ('\u{4ea}', ['\u{4eb}', '\u{0}', '\u{0}']), ('\u{4ec}', ['\u{4ed}', '\u{0}', '\u{0}']), + ('\u{4ee}', ['\u{4ef}', '\u{0}', '\u{0}']), ('\u{4f0}', ['\u{4f1}', '\u{0}', '\u{0}']), + ('\u{4f2}', ['\u{4f3}', '\u{0}', '\u{0}']), ('\u{4f4}', ['\u{4f5}', '\u{0}', '\u{0}']), + ('\u{4f6}', ['\u{4f7}', '\u{0}', '\u{0}']), ('\u{4f8}', ['\u{4f9}', '\u{0}', '\u{0}']), + ('\u{4fa}', ['\u{4fb}', '\u{0}', '\u{0}']), ('\u{4fc}', ['\u{4fd}', '\u{0}', '\u{0}']), + ('\u{4fe}', ['\u{4ff}', '\u{0}', '\u{0}']), ('\u{500}', ['\u{501}', '\u{0}', '\u{0}']), + ('\u{502}', ['\u{503}', '\u{0}', '\u{0}']), ('\u{504}', ['\u{505}', '\u{0}', '\u{0}']), + ('\u{506}', ['\u{507}', '\u{0}', '\u{0}']), ('\u{508}', ['\u{509}', '\u{0}', '\u{0}']), + ('\u{50a}', ['\u{50b}', '\u{0}', '\u{0}']), ('\u{50c}', ['\u{50d}', '\u{0}', '\u{0}']), + ('\u{50e}', ['\u{50f}', '\u{0}', '\u{0}']), ('\u{510}', ['\u{511}', '\u{0}', '\u{0}']), + ('\u{512}', ['\u{513}', '\u{0}', '\u{0}']), ('\u{514}', ['\u{515}', '\u{0}', '\u{0}']), + ('\u{516}', ['\u{517}', '\u{0}', '\u{0}']), ('\u{518}', ['\u{519}', '\u{0}', '\u{0}']), + ('\u{51a}', ['\u{51b}', '\u{0}', '\u{0}']), ('\u{51c}', ['\u{51d}', '\u{0}', '\u{0}']), + ('\u{51e}', ['\u{51f}', '\u{0}', '\u{0}']), ('\u{520}', ['\u{521}', '\u{0}', '\u{0}']), + ('\u{522}', ['\u{523}', '\u{0}', '\u{0}']), ('\u{524}', ['\u{525}', '\u{0}', '\u{0}']), + ('\u{526}', ['\u{527}', '\u{0}', '\u{0}']), ('\u{528}', ['\u{529}', '\u{0}', '\u{0}']), + ('\u{52a}', ['\u{52b}', '\u{0}', '\u{0}']), ('\u{52c}', ['\u{52d}', '\u{0}', '\u{0}']), + ('\u{52e}', ['\u{52f}', '\u{0}', '\u{0}']), ('\u{531}', ['\u{561}', '\u{0}', '\u{0}']), + ('\u{532}', ['\u{562}', '\u{0}', '\u{0}']), ('\u{533}', ['\u{563}', '\u{0}', '\u{0}']), + ('\u{534}', ['\u{564}', '\u{0}', '\u{0}']), ('\u{535}', ['\u{565}', '\u{0}', '\u{0}']), + ('\u{536}', ['\u{566}', '\u{0}', '\u{0}']), ('\u{537}', ['\u{567}', '\u{0}', '\u{0}']), + ('\u{538}', ['\u{568}', '\u{0}', '\u{0}']), ('\u{539}', ['\u{569}', '\u{0}', '\u{0}']), + ('\u{53a}', ['\u{56a}', '\u{0}', '\u{0}']), ('\u{53b}', ['\u{56b}', '\u{0}', '\u{0}']), + ('\u{53c}', ['\u{56c}', '\u{0}', '\u{0}']), ('\u{53d}', ['\u{56d}', '\u{0}', '\u{0}']), + ('\u{53e}', ['\u{56e}', '\u{0}', '\u{0}']), ('\u{53f}', ['\u{56f}', '\u{0}', '\u{0}']), + ('\u{540}', ['\u{570}', '\u{0}', '\u{0}']), ('\u{541}', ['\u{571}', '\u{0}', '\u{0}']), + ('\u{542}', ['\u{572}', '\u{0}', '\u{0}']), ('\u{543}', ['\u{573}', '\u{0}', '\u{0}']), + ('\u{544}', ['\u{574}', '\u{0}', '\u{0}']), ('\u{545}', ['\u{575}', '\u{0}', '\u{0}']), + ('\u{546}', ['\u{576}', '\u{0}', '\u{0}']), ('\u{547}', ['\u{577}', '\u{0}', '\u{0}']), + ('\u{548}', ['\u{578}', '\u{0}', '\u{0}']), ('\u{549}', ['\u{579}', '\u{0}', '\u{0}']), + ('\u{54a}', ['\u{57a}', '\u{0}', '\u{0}']), ('\u{54b}', ['\u{57b}', '\u{0}', '\u{0}']), + ('\u{54c}', ['\u{57c}', '\u{0}', '\u{0}']), ('\u{54d}', ['\u{57d}', '\u{0}', '\u{0}']), + ('\u{54e}', ['\u{57e}', '\u{0}', '\u{0}']), ('\u{54f}', ['\u{57f}', '\u{0}', '\u{0}']), + ('\u{550}', ['\u{580}', '\u{0}', '\u{0}']), ('\u{551}', ['\u{581}', '\u{0}', '\u{0}']), + ('\u{552}', ['\u{582}', '\u{0}', '\u{0}']), ('\u{553}', ['\u{583}', '\u{0}', '\u{0}']), + ('\u{554}', ['\u{584}', '\u{0}', '\u{0}']), ('\u{555}', ['\u{585}', '\u{0}', '\u{0}']), + ('\u{556}', ['\u{586}', '\u{0}', '\u{0}']), ('\u{10a0}', ['\u{2d00}', '\u{0}', '\u{0}']), + ('\u{10a1}', ['\u{2d01}', '\u{0}', '\u{0}']), ('\u{10a2}', ['\u{2d02}', '\u{0}', '\u{0}']), + ('\u{10a3}', ['\u{2d03}', '\u{0}', '\u{0}']), ('\u{10a4}', ['\u{2d04}', '\u{0}', '\u{0}']), + ('\u{10a5}', ['\u{2d05}', '\u{0}', '\u{0}']), ('\u{10a6}', ['\u{2d06}', '\u{0}', '\u{0}']), + ('\u{10a7}', ['\u{2d07}', '\u{0}', '\u{0}']), ('\u{10a8}', ['\u{2d08}', '\u{0}', '\u{0}']), + ('\u{10a9}', ['\u{2d09}', '\u{0}', '\u{0}']), ('\u{10aa}', ['\u{2d0a}', '\u{0}', '\u{0}']), + ('\u{10ab}', ['\u{2d0b}', '\u{0}', '\u{0}']), ('\u{10ac}', ['\u{2d0c}', '\u{0}', '\u{0}']), + ('\u{10ad}', ['\u{2d0d}', '\u{0}', '\u{0}']), ('\u{10ae}', ['\u{2d0e}', '\u{0}', '\u{0}']), + ('\u{10af}', ['\u{2d0f}', '\u{0}', '\u{0}']), ('\u{10b0}', ['\u{2d10}', '\u{0}', '\u{0}']), + ('\u{10b1}', ['\u{2d11}', '\u{0}', '\u{0}']), ('\u{10b2}', ['\u{2d12}', '\u{0}', '\u{0}']), + ('\u{10b3}', ['\u{2d13}', '\u{0}', '\u{0}']), ('\u{10b4}', ['\u{2d14}', '\u{0}', '\u{0}']), + ('\u{10b5}', ['\u{2d15}', '\u{0}', '\u{0}']), ('\u{10b6}', ['\u{2d16}', '\u{0}', '\u{0}']), + ('\u{10b7}', ['\u{2d17}', '\u{0}', '\u{0}']), ('\u{10b8}', ['\u{2d18}', '\u{0}', '\u{0}']), + ('\u{10b9}', ['\u{2d19}', '\u{0}', '\u{0}']), ('\u{10ba}', ['\u{2d1a}', '\u{0}', '\u{0}']), + ('\u{10bb}', ['\u{2d1b}', '\u{0}', '\u{0}']), ('\u{10bc}', ['\u{2d1c}', '\u{0}', '\u{0}']), + ('\u{10bd}', ['\u{2d1d}', '\u{0}', '\u{0}']), ('\u{10be}', ['\u{2d1e}', '\u{0}', '\u{0}']), + ('\u{10bf}', ['\u{2d1f}', '\u{0}', '\u{0}']), ('\u{10c0}', ['\u{2d20}', '\u{0}', '\u{0}']), + ('\u{10c1}', ['\u{2d21}', '\u{0}', '\u{0}']), ('\u{10c2}', ['\u{2d22}', '\u{0}', '\u{0}']), + ('\u{10c3}', ['\u{2d23}', '\u{0}', '\u{0}']), ('\u{10c4}', ['\u{2d24}', '\u{0}', '\u{0}']), + ('\u{10c5}', ['\u{2d25}', '\u{0}', '\u{0}']), ('\u{10c7}', ['\u{2d27}', '\u{0}', '\u{0}']), + ('\u{10cd}', ['\u{2d2d}', '\u{0}', '\u{0}']), ('\u{13a0}', ['\u{ab70}', '\u{0}', '\u{0}']), + ('\u{13a1}', ['\u{ab71}', '\u{0}', '\u{0}']), ('\u{13a2}', ['\u{ab72}', '\u{0}', '\u{0}']), + ('\u{13a3}', ['\u{ab73}', '\u{0}', '\u{0}']), ('\u{13a4}', ['\u{ab74}', '\u{0}', '\u{0}']), + ('\u{13a5}', ['\u{ab75}', '\u{0}', '\u{0}']), ('\u{13a6}', ['\u{ab76}', '\u{0}', '\u{0}']), + ('\u{13a7}', ['\u{ab77}', '\u{0}', '\u{0}']), ('\u{13a8}', ['\u{ab78}', '\u{0}', '\u{0}']), + ('\u{13a9}', ['\u{ab79}', '\u{0}', '\u{0}']), ('\u{13aa}', ['\u{ab7a}', '\u{0}', '\u{0}']), + ('\u{13ab}', ['\u{ab7b}', '\u{0}', '\u{0}']), ('\u{13ac}', ['\u{ab7c}', '\u{0}', '\u{0}']), + ('\u{13ad}', ['\u{ab7d}', '\u{0}', '\u{0}']), ('\u{13ae}', ['\u{ab7e}', '\u{0}', '\u{0}']), + ('\u{13af}', ['\u{ab7f}', '\u{0}', '\u{0}']), ('\u{13b0}', ['\u{ab80}', '\u{0}', '\u{0}']), + ('\u{13b1}', ['\u{ab81}', '\u{0}', '\u{0}']), ('\u{13b2}', ['\u{ab82}', '\u{0}', '\u{0}']), + ('\u{13b3}', ['\u{ab83}', '\u{0}', '\u{0}']), ('\u{13b4}', ['\u{ab84}', '\u{0}', '\u{0}']), + ('\u{13b5}', ['\u{ab85}', '\u{0}', '\u{0}']), ('\u{13b6}', ['\u{ab86}', '\u{0}', '\u{0}']), + ('\u{13b7}', ['\u{ab87}', '\u{0}', '\u{0}']), ('\u{13b8}', ['\u{ab88}', '\u{0}', '\u{0}']), + ('\u{13b9}', ['\u{ab89}', '\u{0}', '\u{0}']), ('\u{13ba}', ['\u{ab8a}', '\u{0}', '\u{0}']), + ('\u{13bb}', ['\u{ab8b}', '\u{0}', '\u{0}']), ('\u{13bc}', ['\u{ab8c}', '\u{0}', '\u{0}']), + ('\u{13bd}', ['\u{ab8d}', '\u{0}', '\u{0}']), ('\u{13be}', ['\u{ab8e}', '\u{0}', '\u{0}']), + ('\u{13bf}', ['\u{ab8f}', '\u{0}', '\u{0}']), ('\u{13c0}', ['\u{ab90}', '\u{0}', '\u{0}']), + ('\u{13c1}', ['\u{ab91}', '\u{0}', '\u{0}']), ('\u{13c2}', ['\u{ab92}', '\u{0}', '\u{0}']), + ('\u{13c3}', ['\u{ab93}', '\u{0}', '\u{0}']), ('\u{13c4}', ['\u{ab94}', '\u{0}', '\u{0}']), + ('\u{13c5}', ['\u{ab95}', '\u{0}', '\u{0}']), ('\u{13c6}', ['\u{ab96}', '\u{0}', '\u{0}']), + ('\u{13c7}', ['\u{ab97}', '\u{0}', '\u{0}']), ('\u{13c8}', ['\u{ab98}', '\u{0}', '\u{0}']), + ('\u{13c9}', ['\u{ab99}', '\u{0}', '\u{0}']), ('\u{13ca}', ['\u{ab9a}', '\u{0}', '\u{0}']), + ('\u{13cb}', ['\u{ab9b}', '\u{0}', '\u{0}']), ('\u{13cc}', ['\u{ab9c}', '\u{0}', '\u{0}']), + ('\u{13cd}', ['\u{ab9d}', '\u{0}', '\u{0}']), ('\u{13ce}', ['\u{ab9e}', '\u{0}', '\u{0}']), + ('\u{13cf}', ['\u{ab9f}', '\u{0}', '\u{0}']), ('\u{13d0}', ['\u{aba0}', '\u{0}', '\u{0}']), + ('\u{13d1}', ['\u{aba1}', '\u{0}', '\u{0}']), ('\u{13d2}', ['\u{aba2}', '\u{0}', '\u{0}']), + ('\u{13d3}', ['\u{aba3}', '\u{0}', '\u{0}']), ('\u{13d4}', ['\u{aba4}', '\u{0}', '\u{0}']), + ('\u{13d5}', ['\u{aba5}', '\u{0}', '\u{0}']), ('\u{13d6}', ['\u{aba6}', '\u{0}', '\u{0}']), + ('\u{13d7}', ['\u{aba7}', '\u{0}', '\u{0}']), ('\u{13d8}', ['\u{aba8}', '\u{0}', '\u{0}']), + ('\u{13d9}', ['\u{aba9}', '\u{0}', '\u{0}']), ('\u{13da}', ['\u{abaa}', '\u{0}', '\u{0}']), + ('\u{13db}', ['\u{abab}', '\u{0}', '\u{0}']), ('\u{13dc}', ['\u{abac}', '\u{0}', '\u{0}']), + ('\u{13dd}', ['\u{abad}', '\u{0}', '\u{0}']), ('\u{13de}', ['\u{abae}', '\u{0}', '\u{0}']), + ('\u{13df}', ['\u{abaf}', '\u{0}', '\u{0}']), ('\u{13e0}', ['\u{abb0}', '\u{0}', '\u{0}']), + ('\u{13e1}', ['\u{abb1}', '\u{0}', '\u{0}']), ('\u{13e2}', ['\u{abb2}', '\u{0}', '\u{0}']), + ('\u{13e3}', ['\u{abb3}', '\u{0}', '\u{0}']), ('\u{13e4}', ['\u{abb4}', '\u{0}', '\u{0}']), + ('\u{13e5}', ['\u{abb5}', '\u{0}', '\u{0}']), ('\u{13e6}', ['\u{abb6}', '\u{0}', '\u{0}']), + ('\u{13e7}', ['\u{abb7}', '\u{0}', '\u{0}']), ('\u{13e8}', ['\u{abb8}', '\u{0}', '\u{0}']), + ('\u{13e9}', ['\u{abb9}', '\u{0}', '\u{0}']), ('\u{13ea}', ['\u{abba}', '\u{0}', '\u{0}']), + ('\u{13eb}', ['\u{abbb}', '\u{0}', '\u{0}']), ('\u{13ec}', ['\u{abbc}', '\u{0}', '\u{0}']), + ('\u{13ed}', ['\u{abbd}', '\u{0}', '\u{0}']), ('\u{13ee}', ['\u{abbe}', '\u{0}', '\u{0}']), + ('\u{13ef}', ['\u{abbf}', '\u{0}', '\u{0}']), ('\u{13f0}', ['\u{13f8}', '\u{0}', '\u{0}']), + ('\u{13f1}', ['\u{13f9}', '\u{0}', '\u{0}']), ('\u{13f2}', ['\u{13fa}', '\u{0}', '\u{0}']), + ('\u{13f3}', ['\u{13fb}', '\u{0}', '\u{0}']), ('\u{13f4}', ['\u{13fc}', '\u{0}', '\u{0}']), + ('\u{13f5}', ['\u{13fd}', '\u{0}', '\u{0}']), ('\u{1c89}', ['\u{1c8a}', '\u{0}', '\u{0}']), + ('\u{1c90}', ['\u{10d0}', '\u{0}', '\u{0}']), ('\u{1c91}', ['\u{10d1}', '\u{0}', '\u{0}']), + ('\u{1c92}', ['\u{10d2}', '\u{0}', '\u{0}']), ('\u{1c93}', ['\u{10d3}', '\u{0}', '\u{0}']), + ('\u{1c94}', ['\u{10d4}', '\u{0}', '\u{0}']), ('\u{1c95}', ['\u{10d5}', '\u{0}', '\u{0}']), + ('\u{1c96}', ['\u{10d6}', '\u{0}', '\u{0}']), ('\u{1c97}', ['\u{10d7}', '\u{0}', '\u{0}']), + ('\u{1c98}', ['\u{10d8}', '\u{0}', '\u{0}']), ('\u{1c99}', ['\u{10d9}', '\u{0}', '\u{0}']), + ('\u{1c9a}', ['\u{10da}', '\u{0}', '\u{0}']), ('\u{1c9b}', ['\u{10db}', '\u{0}', '\u{0}']), + ('\u{1c9c}', ['\u{10dc}', '\u{0}', '\u{0}']), ('\u{1c9d}', ['\u{10dd}', '\u{0}', '\u{0}']), + ('\u{1c9e}', ['\u{10de}', '\u{0}', '\u{0}']), ('\u{1c9f}', ['\u{10df}', '\u{0}', '\u{0}']), + ('\u{1ca0}', ['\u{10e0}', '\u{0}', '\u{0}']), ('\u{1ca1}', ['\u{10e1}', '\u{0}', '\u{0}']), + ('\u{1ca2}', ['\u{10e2}', '\u{0}', '\u{0}']), ('\u{1ca3}', ['\u{10e3}', '\u{0}', '\u{0}']), + ('\u{1ca4}', ['\u{10e4}', '\u{0}', '\u{0}']), ('\u{1ca5}', ['\u{10e5}', '\u{0}', '\u{0}']), + ('\u{1ca6}', ['\u{10e6}', '\u{0}', '\u{0}']), ('\u{1ca7}', ['\u{10e7}', '\u{0}', '\u{0}']), + ('\u{1ca8}', ['\u{10e8}', '\u{0}', '\u{0}']), ('\u{1ca9}', ['\u{10e9}', '\u{0}', '\u{0}']), + ('\u{1caa}', ['\u{10ea}', '\u{0}', '\u{0}']), ('\u{1cab}', ['\u{10eb}', '\u{0}', '\u{0}']), + ('\u{1cac}', ['\u{10ec}', '\u{0}', '\u{0}']), ('\u{1cad}', ['\u{10ed}', '\u{0}', '\u{0}']), + ('\u{1cae}', ['\u{10ee}', '\u{0}', '\u{0}']), ('\u{1caf}', ['\u{10ef}', '\u{0}', '\u{0}']), + ('\u{1cb0}', ['\u{10f0}', '\u{0}', '\u{0}']), ('\u{1cb1}', ['\u{10f1}', '\u{0}', '\u{0}']), + ('\u{1cb2}', ['\u{10f2}', '\u{0}', '\u{0}']), ('\u{1cb3}', ['\u{10f3}', '\u{0}', '\u{0}']), + ('\u{1cb4}', ['\u{10f4}', '\u{0}', '\u{0}']), ('\u{1cb5}', ['\u{10f5}', '\u{0}', '\u{0}']), + ('\u{1cb6}', ['\u{10f6}', '\u{0}', '\u{0}']), ('\u{1cb7}', ['\u{10f7}', '\u{0}', '\u{0}']), + ('\u{1cb8}', ['\u{10f8}', '\u{0}', '\u{0}']), ('\u{1cb9}', ['\u{10f9}', '\u{0}', '\u{0}']), + ('\u{1cba}', ['\u{10fa}', '\u{0}', '\u{0}']), ('\u{1cbd}', ['\u{10fd}', '\u{0}', '\u{0}']), + ('\u{1cbe}', ['\u{10fe}', '\u{0}', '\u{0}']), ('\u{1cbf}', ['\u{10ff}', '\u{0}', '\u{0}']), + ('\u{1e00}', ['\u{1e01}', '\u{0}', '\u{0}']), ('\u{1e02}', ['\u{1e03}', '\u{0}', '\u{0}']), + ('\u{1e04}', ['\u{1e05}', '\u{0}', '\u{0}']), ('\u{1e06}', ['\u{1e07}', '\u{0}', '\u{0}']), + ('\u{1e08}', ['\u{1e09}', '\u{0}', '\u{0}']), ('\u{1e0a}', ['\u{1e0b}', '\u{0}', '\u{0}']), + ('\u{1e0c}', ['\u{1e0d}', '\u{0}', '\u{0}']), ('\u{1e0e}', ['\u{1e0f}', '\u{0}', '\u{0}']), + ('\u{1e10}', ['\u{1e11}', '\u{0}', '\u{0}']), ('\u{1e12}', ['\u{1e13}', '\u{0}', '\u{0}']), + ('\u{1e14}', ['\u{1e15}', '\u{0}', '\u{0}']), ('\u{1e16}', ['\u{1e17}', '\u{0}', '\u{0}']), + ('\u{1e18}', ['\u{1e19}', '\u{0}', '\u{0}']), ('\u{1e1a}', ['\u{1e1b}', '\u{0}', '\u{0}']), + ('\u{1e1c}', ['\u{1e1d}', '\u{0}', '\u{0}']), ('\u{1e1e}', ['\u{1e1f}', '\u{0}', '\u{0}']), + ('\u{1e20}', ['\u{1e21}', '\u{0}', '\u{0}']), ('\u{1e22}', ['\u{1e23}', '\u{0}', '\u{0}']), + ('\u{1e24}', ['\u{1e25}', '\u{0}', '\u{0}']), ('\u{1e26}', ['\u{1e27}', '\u{0}', '\u{0}']), + ('\u{1e28}', ['\u{1e29}', '\u{0}', '\u{0}']), ('\u{1e2a}', ['\u{1e2b}', '\u{0}', '\u{0}']), + ('\u{1e2c}', ['\u{1e2d}', '\u{0}', '\u{0}']), ('\u{1e2e}', ['\u{1e2f}', '\u{0}', '\u{0}']), + ('\u{1e30}', ['\u{1e31}', '\u{0}', '\u{0}']), ('\u{1e32}', ['\u{1e33}', '\u{0}', '\u{0}']), + ('\u{1e34}', ['\u{1e35}', '\u{0}', '\u{0}']), ('\u{1e36}', ['\u{1e37}', '\u{0}', '\u{0}']), + ('\u{1e38}', ['\u{1e39}', '\u{0}', '\u{0}']), ('\u{1e3a}', ['\u{1e3b}', '\u{0}', '\u{0}']), + ('\u{1e3c}', ['\u{1e3d}', '\u{0}', '\u{0}']), ('\u{1e3e}', ['\u{1e3f}', '\u{0}', '\u{0}']), + ('\u{1e40}', ['\u{1e41}', '\u{0}', '\u{0}']), ('\u{1e42}', ['\u{1e43}', '\u{0}', '\u{0}']), + ('\u{1e44}', ['\u{1e45}', '\u{0}', '\u{0}']), ('\u{1e46}', ['\u{1e47}', '\u{0}', '\u{0}']), + ('\u{1e48}', ['\u{1e49}', '\u{0}', '\u{0}']), ('\u{1e4a}', ['\u{1e4b}', '\u{0}', '\u{0}']), + ('\u{1e4c}', ['\u{1e4d}', '\u{0}', '\u{0}']), ('\u{1e4e}', ['\u{1e4f}', '\u{0}', '\u{0}']), + ('\u{1e50}', ['\u{1e51}', '\u{0}', '\u{0}']), ('\u{1e52}', ['\u{1e53}', '\u{0}', '\u{0}']), + ('\u{1e54}', ['\u{1e55}', '\u{0}', '\u{0}']), ('\u{1e56}', ['\u{1e57}', '\u{0}', '\u{0}']), + ('\u{1e58}', ['\u{1e59}', '\u{0}', '\u{0}']), ('\u{1e5a}', ['\u{1e5b}', '\u{0}', '\u{0}']), + ('\u{1e5c}', ['\u{1e5d}', '\u{0}', '\u{0}']), ('\u{1e5e}', ['\u{1e5f}', '\u{0}', '\u{0}']), + ('\u{1e60}', ['\u{1e61}', '\u{0}', '\u{0}']), ('\u{1e62}', ['\u{1e63}', '\u{0}', '\u{0}']), + ('\u{1e64}', ['\u{1e65}', '\u{0}', '\u{0}']), ('\u{1e66}', ['\u{1e67}', '\u{0}', '\u{0}']), + ('\u{1e68}', ['\u{1e69}', '\u{0}', '\u{0}']), ('\u{1e6a}', ['\u{1e6b}', '\u{0}', '\u{0}']), + ('\u{1e6c}', ['\u{1e6d}', '\u{0}', '\u{0}']), ('\u{1e6e}', ['\u{1e6f}', '\u{0}', '\u{0}']), + ('\u{1e70}', ['\u{1e71}', '\u{0}', '\u{0}']), ('\u{1e72}', ['\u{1e73}', '\u{0}', '\u{0}']), + ('\u{1e74}', ['\u{1e75}', '\u{0}', '\u{0}']), ('\u{1e76}', ['\u{1e77}', '\u{0}', '\u{0}']), + ('\u{1e78}', ['\u{1e79}', '\u{0}', '\u{0}']), ('\u{1e7a}', ['\u{1e7b}', '\u{0}', '\u{0}']), + ('\u{1e7c}', ['\u{1e7d}', '\u{0}', '\u{0}']), ('\u{1e7e}', ['\u{1e7f}', '\u{0}', '\u{0}']), + ('\u{1e80}', ['\u{1e81}', '\u{0}', '\u{0}']), ('\u{1e82}', ['\u{1e83}', '\u{0}', '\u{0}']), + ('\u{1e84}', ['\u{1e85}', '\u{0}', '\u{0}']), ('\u{1e86}', ['\u{1e87}', '\u{0}', '\u{0}']), + ('\u{1e88}', ['\u{1e89}', '\u{0}', '\u{0}']), ('\u{1e8a}', ['\u{1e8b}', '\u{0}', '\u{0}']), + ('\u{1e8c}', ['\u{1e8d}', '\u{0}', '\u{0}']), ('\u{1e8e}', ['\u{1e8f}', '\u{0}', '\u{0}']), + ('\u{1e90}', ['\u{1e91}', '\u{0}', '\u{0}']), ('\u{1e92}', ['\u{1e93}', '\u{0}', '\u{0}']), + ('\u{1e94}', ['\u{1e95}', '\u{0}', '\u{0}']), ('\u{1e9e}', ['\u{df}', '\u{0}', '\u{0}']), + ('\u{1ea0}', ['\u{1ea1}', '\u{0}', '\u{0}']), ('\u{1ea2}', ['\u{1ea3}', '\u{0}', '\u{0}']), + ('\u{1ea4}', ['\u{1ea5}', '\u{0}', '\u{0}']), ('\u{1ea6}', ['\u{1ea7}', '\u{0}', '\u{0}']), + ('\u{1ea8}', ['\u{1ea9}', '\u{0}', '\u{0}']), ('\u{1eaa}', ['\u{1eab}', '\u{0}', '\u{0}']), + ('\u{1eac}', ['\u{1ead}', '\u{0}', '\u{0}']), ('\u{1eae}', ['\u{1eaf}', '\u{0}', '\u{0}']), + ('\u{1eb0}', ['\u{1eb1}', '\u{0}', '\u{0}']), ('\u{1eb2}', ['\u{1eb3}', '\u{0}', '\u{0}']), + ('\u{1eb4}', ['\u{1eb5}', '\u{0}', '\u{0}']), ('\u{1eb6}', ['\u{1eb7}', '\u{0}', '\u{0}']), + ('\u{1eb8}', ['\u{1eb9}', '\u{0}', '\u{0}']), ('\u{1eba}', ['\u{1ebb}', '\u{0}', '\u{0}']), + ('\u{1ebc}', ['\u{1ebd}', '\u{0}', '\u{0}']), ('\u{1ebe}', ['\u{1ebf}', '\u{0}', '\u{0}']), + ('\u{1ec0}', ['\u{1ec1}', '\u{0}', '\u{0}']), ('\u{1ec2}', ['\u{1ec3}', '\u{0}', '\u{0}']), + ('\u{1ec4}', ['\u{1ec5}', '\u{0}', '\u{0}']), ('\u{1ec6}', ['\u{1ec7}', '\u{0}', '\u{0}']), + ('\u{1ec8}', ['\u{1ec9}', '\u{0}', '\u{0}']), ('\u{1eca}', ['\u{1ecb}', '\u{0}', '\u{0}']), + ('\u{1ecc}', ['\u{1ecd}', '\u{0}', '\u{0}']), ('\u{1ece}', ['\u{1ecf}', '\u{0}', '\u{0}']), + ('\u{1ed0}', ['\u{1ed1}', '\u{0}', '\u{0}']), ('\u{1ed2}', ['\u{1ed3}', '\u{0}', '\u{0}']), + ('\u{1ed4}', ['\u{1ed5}', '\u{0}', '\u{0}']), ('\u{1ed6}', ['\u{1ed7}', '\u{0}', '\u{0}']), + ('\u{1ed8}', ['\u{1ed9}', '\u{0}', '\u{0}']), ('\u{1eda}', ['\u{1edb}', '\u{0}', '\u{0}']), + ('\u{1edc}', ['\u{1edd}', '\u{0}', '\u{0}']), ('\u{1ede}', ['\u{1edf}', '\u{0}', '\u{0}']), + ('\u{1ee0}', ['\u{1ee1}', '\u{0}', '\u{0}']), ('\u{1ee2}', ['\u{1ee3}', '\u{0}', '\u{0}']), + ('\u{1ee4}', ['\u{1ee5}', '\u{0}', '\u{0}']), ('\u{1ee6}', ['\u{1ee7}', '\u{0}', '\u{0}']), + ('\u{1ee8}', ['\u{1ee9}', '\u{0}', '\u{0}']), ('\u{1eea}', ['\u{1eeb}', '\u{0}', '\u{0}']), + ('\u{1eec}', ['\u{1eed}', '\u{0}', '\u{0}']), ('\u{1eee}', ['\u{1eef}', '\u{0}', '\u{0}']), + ('\u{1ef0}', ['\u{1ef1}', '\u{0}', '\u{0}']), ('\u{1ef2}', ['\u{1ef3}', '\u{0}', '\u{0}']), + ('\u{1ef4}', ['\u{1ef5}', '\u{0}', '\u{0}']), ('\u{1ef6}', ['\u{1ef7}', '\u{0}', '\u{0}']), + ('\u{1ef8}', ['\u{1ef9}', '\u{0}', '\u{0}']), ('\u{1efa}', ['\u{1efb}', '\u{0}', '\u{0}']), + ('\u{1efc}', ['\u{1efd}', '\u{0}', '\u{0}']), ('\u{1efe}', ['\u{1eff}', '\u{0}', '\u{0}']), + ('\u{1f08}', ['\u{1f00}', '\u{0}', '\u{0}']), ('\u{1f09}', ['\u{1f01}', '\u{0}', '\u{0}']), + ('\u{1f0a}', ['\u{1f02}', '\u{0}', '\u{0}']), ('\u{1f0b}', ['\u{1f03}', '\u{0}', '\u{0}']), + ('\u{1f0c}', ['\u{1f04}', '\u{0}', '\u{0}']), ('\u{1f0d}', ['\u{1f05}', '\u{0}', '\u{0}']), + ('\u{1f0e}', ['\u{1f06}', '\u{0}', '\u{0}']), ('\u{1f0f}', ['\u{1f07}', '\u{0}', '\u{0}']), + ('\u{1f18}', ['\u{1f10}', '\u{0}', '\u{0}']), ('\u{1f19}', ['\u{1f11}', '\u{0}', '\u{0}']), + ('\u{1f1a}', ['\u{1f12}', '\u{0}', '\u{0}']), ('\u{1f1b}', ['\u{1f13}', '\u{0}', '\u{0}']), + ('\u{1f1c}', ['\u{1f14}', '\u{0}', '\u{0}']), ('\u{1f1d}', ['\u{1f15}', '\u{0}', '\u{0}']), + ('\u{1f28}', ['\u{1f20}', '\u{0}', '\u{0}']), ('\u{1f29}', ['\u{1f21}', '\u{0}', '\u{0}']), + ('\u{1f2a}', ['\u{1f22}', '\u{0}', '\u{0}']), ('\u{1f2b}', ['\u{1f23}', '\u{0}', '\u{0}']), + ('\u{1f2c}', ['\u{1f24}', '\u{0}', '\u{0}']), ('\u{1f2d}', ['\u{1f25}', '\u{0}', '\u{0}']), + ('\u{1f2e}', ['\u{1f26}', '\u{0}', '\u{0}']), ('\u{1f2f}', ['\u{1f27}', '\u{0}', '\u{0}']), + ('\u{1f38}', ['\u{1f30}', '\u{0}', '\u{0}']), ('\u{1f39}', ['\u{1f31}', '\u{0}', '\u{0}']), + ('\u{1f3a}', ['\u{1f32}', '\u{0}', '\u{0}']), ('\u{1f3b}', ['\u{1f33}', '\u{0}', '\u{0}']), + ('\u{1f3c}', ['\u{1f34}', '\u{0}', '\u{0}']), ('\u{1f3d}', ['\u{1f35}', '\u{0}', '\u{0}']), + ('\u{1f3e}', ['\u{1f36}', '\u{0}', '\u{0}']), ('\u{1f3f}', ['\u{1f37}', '\u{0}', '\u{0}']), + ('\u{1f48}', ['\u{1f40}', '\u{0}', '\u{0}']), ('\u{1f49}', ['\u{1f41}', '\u{0}', '\u{0}']), + ('\u{1f4a}', ['\u{1f42}', '\u{0}', '\u{0}']), ('\u{1f4b}', ['\u{1f43}', '\u{0}', '\u{0}']), + ('\u{1f4c}', ['\u{1f44}', '\u{0}', '\u{0}']), ('\u{1f4d}', ['\u{1f45}', '\u{0}', '\u{0}']), + ('\u{1f59}', ['\u{1f51}', '\u{0}', '\u{0}']), ('\u{1f5b}', ['\u{1f53}', '\u{0}', '\u{0}']), + ('\u{1f5d}', ['\u{1f55}', '\u{0}', '\u{0}']), ('\u{1f5f}', ['\u{1f57}', '\u{0}', '\u{0}']), + ('\u{1f68}', ['\u{1f60}', '\u{0}', '\u{0}']), ('\u{1f69}', ['\u{1f61}', '\u{0}', '\u{0}']), + ('\u{1f6a}', ['\u{1f62}', '\u{0}', '\u{0}']), ('\u{1f6b}', ['\u{1f63}', '\u{0}', '\u{0}']), + ('\u{1f6c}', ['\u{1f64}', '\u{0}', '\u{0}']), ('\u{1f6d}', ['\u{1f65}', '\u{0}', '\u{0}']), + ('\u{1f6e}', ['\u{1f66}', '\u{0}', '\u{0}']), ('\u{1f6f}', ['\u{1f67}', '\u{0}', '\u{0}']), + ('\u{1f88}', ['\u{1f80}', '\u{0}', '\u{0}']), ('\u{1f89}', ['\u{1f81}', '\u{0}', '\u{0}']), + ('\u{1f8a}', ['\u{1f82}', '\u{0}', '\u{0}']), ('\u{1f8b}', ['\u{1f83}', '\u{0}', '\u{0}']), + ('\u{1f8c}', ['\u{1f84}', '\u{0}', '\u{0}']), ('\u{1f8d}', ['\u{1f85}', '\u{0}', '\u{0}']), + ('\u{1f8e}', ['\u{1f86}', '\u{0}', '\u{0}']), ('\u{1f8f}', ['\u{1f87}', '\u{0}', '\u{0}']), + ('\u{1f98}', ['\u{1f90}', '\u{0}', '\u{0}']), ('\u{1f99}', ['\u{1f91}', '\u{0}', '\u{0}']), + ('\u{1f9a}', ['\u{1f92}', '\u{0}', '\u{0}']), ('\u{1f9b}', ['\u{1f93}', '\u{0}', '\u{0}']), + ('\u{1f9c}', ['\u{1f94}', '\u{0}', '\u{0}']), ('\u{1f9d}', ['\u{1f95}', '\u{0}', '\u{0}']), + ('\u{1f9e}', ['\u{1f96}', '\u{0}', '\u{0}']), ('\u{1f9f}', ['\u{1f97}', '\u{0}', '\u{0}']), + ('\u{1fa8}', ['\u{1fa0}', '\u{0}', '\u{0}']), ('\u{1fa9}', ['\u{1fa1}', '\u{0}', '\u{0}']), + ('\u{1faa}', ['\u{1fa2}', '\u{0}', '\u{0}']), ('\u{1fab}', ['\u{1fa3}', '\u{0}', '\u{0}']), + ('\u{1fac}', ['\u{1fa4}', '\u{0}', '\u{0}']), ('\u{1fad}', ['\u{1fa5}', '\u{0}', '\u{0}']), + ('\u{1fae}', ['\u{1fa6}', '\u{0}', '\u{0}']), ('\u{1faf}', ['\u{1fa7}', '\u{0}', '\u{0}']), + ('\u{1fb8}', ['\u{1fb0}', '\u{0}', '\u{0}']), ('\u{1fb9}', ['\u{1fb1}', '\u{0}', '\u{0}']), + ('\u{1fba}', ['\u{1f70}', '\u{0}', '\u{0}']), ('\u{1fbb}', ['\u{1f71}', '\u{0}', '\u{0}']), + ('\u{1fbc}', ['\u{1fb3}', '\u{0}', '\u{0}']), ('\u{1fc8}', ['\u{1f72}', '\u{0}', '\u{0}']), + ('\u{1fc9}', ['\u{1f73}', '\u{0}', '\u{0}']), ('\u{1fca}', ['\u{1f74}', '\u{0}', '\u{0}']), + ('\u{1fcb}', ['\u{1f75}', '\u{0}', '\u{0}']), ('\u{1fcc}', ['\u{1fc3}', '\u{0}', '\u{0}']), + ('\u{1fd8}', ['\u{1fd0}', '\u{0}', '\u{0}']), ('\u{1fd9}', ['\u{1fd1}', '\u{0}', '\u{0}']), + ('\u{1fda}', ['\u{1f76}', '\u{0}', '\u{0}']), ('\u{1fdb}', ['\u{1f77}', '\u{0}', '\u{0}']), + ('\u{1fe8}', ['\u{1fe0}', '\u{0}', '\u{0}']), ('\u{1fe9}', ['\u{1fe1}', '\u{0}', '\u{0}']), + ('\u{1fea}', ['\u{1f7a}', '\u{0}', '\u{0}']), ('\u{1feb}', ['\u{1f7b}', '\u{0}', '\u{0}']), + ('\u{1fec}', ['\u{1fe5}', '\u{0}', '\u{0}']), ('\u{1ff8}', ['\u{1f78}', '\u{0}', '\u{0}']), + ('\u{1ff9}', ['\u{1f79}', '\u{0}', '\u{0}']), ('\u{1ffa}', ['\u{1f7c}', '\u{0}', '\u{0}']), + ('\u{1ffb}', ['\u{1f7d}', '\u{0}', '\u{0}']), ('\u{1ffc}', ['\u{1ff3}', '\u{0}', '\u{0}']), + ('\u{2126}', ['\u{3c9}', '\u{0}', '\u{0}']), ('\u{212a}', ['\u{6b}', '\u{0}', '\u{0}']), + ('\u{212b}', ['\u{e5}', '\u{0}', '\u{0}']), ('\u{2132}', ['\u{214e}', '\u{0}', '\u{0}']), + ('\u{2160}', ['\u{2170}', '\u{0}', '\u{0}']), ('\u{2161}', ['\u{2171}', '\u{0}', '\u{0}']), + ('\u{2162}', ['\u{2172}', '\u{0}', '\u{0}']), ('\u{2163}', ['\u{2173}', '\u{0}', '\u{0}']), + ('\u{2164}', ['\u{2174}', '\u{0}', '\u{0}']), ('\u{2165}', ['\u{2175}', '\u{0}', '\u{0}']), + ('\u{2166}', ['\u{2176}', '\u{0}', '\u{0}']), ('\u{2167}', ['\u{2177}', '\u{0}', '\u{0}']), + ('\u{2168}', ['\u{2178}', '\u{0}', '\u{0}']), ('\u{2169}', ['\u{2179}', '\u{0}', '\u{0}']), + ('\u{216a}', ['\u{217a}', '\u{0}', '\u{0}']), ('\u{216b}', ['\u{217b}', '\u{0}', '\u{0}']), + ('\u{216c}', ['\u{217c}', '\u{0}', '\u{0}']), ('\u{216d}', ['\u{217d}', '\u{0}', '\u{0}']), + ('\u{216e}', ['\u{217e}', '\u{0}', '\u{0}']), ('\u{216f}', ['\u{217f}', '\u{0}', '\u{0}']), + ('\u{2183}', ['\u{2184}', '\u{0}', '\u{0}']), ('\u{24b6}', ['\u{24d0}', '\u{0}', '\u{0}']), + ('\u{24b7}', ['\u{24d1}', '\u{0}', '\u{0}']), ('\u{24b8}', ['\u{24d2}', '\u{0}', '\u{0}']), + ('\u{24b9}', ['\u{24d3}', '\u{0}', '\u{0}']), ('\u{24ba}', ['\u{24d4}', '\u{0}', '\u{0}']), + ('\u{24bb}', ['\u{24d5}', '\u{0}', '\u{0}']), ('\u{24bc}', ['\u{24d6}', '\u{0}', '\u{0}']), + ('\u{24bd}', ['\u{24d7}', '\u{0}', '\u{0}']), ('\u{24be}', ['\u{24d8}', '\u{0}', '\u{0}']), + ('\u{24bf}', ['\u{24d9}', '\u{0}', '\u{0}']), ('\u{24c0}', ['\u{24da}', '\u{0}', '\u{0}']), + ('\u{24c1}', ['\u{24db}', '\u{0}', '\u{0}']), ('\u{24c2}', ['\u{24dc}', '\u{0}', '\u{0}']), + ('\u{24c3}', ['\u{24dd}', '\u{0}', '\u{0}']), ('\u{24c4}', ['\u{24de}', '\u{0}', '\u{0}']), + ('\u{24c5}', ['\u{24df}', '\u{0}', '\u{0}']), ('\u{24c6}', ['\u{24e0}', '\u{0}', '\u{0}']), + ('\u{24c7}', ['\u{24e1}', '\u{0}', '\u{0}']), ('\u{24c8}', ['\u{24e2}', '\u{0}', '\u{0}']), + ('\u{24c9}', ['\u{24e3}', '\u{0}', '\u{0}']), ('\u{24ca}', ['\u{24e4}', '\u{0}', '\u{0}']), + ('\u{24cb}', ['\u{24e5}', '\u{0}', '\u{0}']), ('\u{24cc}', ['\u{24e6}', '\u{0}', '\u{0}']), + ('\u{24cd}', ['\u{24e7}', '\u{0}', '\u{0}']), ('\u{24ce}', ['\u{24e8}', '\u{0}', '\u{0}']), + ('\u{24cf}', ['\u{24e9}', '\u{0}', '\u{0}']), ('\u{2c00}', ['\u{2c30}', '\u{0}', '\u{0}']), + ('\u{2c01}', ['\u{2c31}', '\u{0}', '\u{0}']), ('\u{2c02}', ['\u{2c32}', '\u{0}', '\u{0}']), + ('\u{2c03}', ['\u{2c33}', '\u{0}', '\u{0}']), ('\u{2c04}', ['\u{2c34}', '\u{0}', '\u{0}']), + ('\u{2c05}', ['\u{2c35}', '\u{0}', '\u{0}']), ('\u{2c06}', ['\u{2c36}', '\u{0}', '\u{0}']), + ('\u{2c07}', ['\u{2c37}', '\u{0}', '\u{0}']), ('\u{2c08}', ['\u{2c38}', '\u{0}', '\u{0}']), + ('\u{2c09}', ['\u{2c39}', '\u{0}', '\u{0}']), ('\u{2c0a}', ['\u{2c3a}', '\u{0}', '\u{0}']), + ('\u{2c0b}', ['\u{2c3b}', '\u{0}', '\u{0}']), ('\u{2c0c}', ['\u{2c3c}', '\u{0}', '\u{0}']), + ('\u{2c0d}', ['\u{2c3d}', '\u{0}', '\u{0}']), ('\u{2c0e}', ['\u{2c3e}', '\u{0}', '\u{0}']), + ('\u{2c0f}', ['\u{2c3f}', '\u{0}', '\u{0}']), ('\u{2c10}', ['\u{2c40}', '\u{0}', '\u{0}']), + ('\u{2c11}', ['\u{2c41}', '\u{0}', '\u{0}']), ('\u{2c12}', ['\u{2c42}', '\u{0}', '\u{0}']), + ('\u{2c13}', ['\u{2c43}', '\u{0}', '\u{0}']), ('\u{2c14}', ['\u{2c44}', '\u{0}', '\u{0}']), + ('\u{2c15}', ['\u{2c45}', '\u{0}', '\u{0}']), ('\u{2c16}', ['\u{2c46}', '\u{0}', '\u{0}']), + ('\u{2c17}', ['\u{2c47}', '\u{0}', '\u{0}']), ('\u{2c18}', ['\u{2c48}', '\u{0}', '\u{0}']), + ('\u{2c19}', ['\u{2c49}', '\u{0}', '\u{0}']), ('\u{2c1a}', ['\u{2c4a}', '\u{0}', '\u{0}']), + ('\u{2c1b}', ['\u{2c4b}', '\u{0}', '\u{0}']), ('\u{2c1c}', ['\u{2c4c}', '\u{0}', '\u{0}']), + ('\u{2c1d}', ['\u{2c4d}', '\u{0}', '\u{0}']), ('\u{2c1e}', ['\u{2c4e}', '\u{0}', '\u{0}']), + ('\u{2c1f}', ['\u{2c4f}', '\u{0}', '\u{0}']), ('\u{2c20}', ['\u{2c50}', '\u{0}', '\u{0}']), + ('\u{2c21}', ['\u{2c51}', '\u{0}', '\u{0}']), ('\u{2c22}', ['\u{2c52}', '\u{0}', '\u{0}']), + ('\u{2c23}', ['\u{2c53}', '\u{0}', '\u{0}']), ('\u{2c24}', ['\u{2c54}', '\u{0}', '\u{0}']), + ('\u{2c25}', ['\u{2c55}', '\u{0}', '\u{0}']), ('\u{2c26}', ['\u{2c56}', '\u{0}', '\u{0}']), + ('\u{2c27}', ['\u{2c57}', '\u{0}', '\u{0}']), ('\u{2c28}', ['\u{2c58}', '\u{0}', '\u{0}']), + ('\u{2c29}', ['\u{2c59}', '\u{0}', '\u{0}']), ('\u{2c2a}', ['\u{2c5a}', '\u{0}', '\u{0}']), + ('\u{2c2b}', ['\u{2c5b}', '\u{0}', '\u{0}']), ('\u{2c2c}', ['\u{2c5c}', '\u{0}', '\u{0}']), + ('\u{2c2d}', ['\u{2c5d}', '\u{0}', '\u{0}']), ('\u{2c2e}', ['\u{2c5e}', '\u{0}', '\u{0}']), + ('\u{2c2f}', ['\u{2c5f}', '\u{0}', '\u{0}']), ('\u{2c60}', ['\u{2c61}', '\u{0}', '\u{0}']), + ('\u{2c62}', ['\u{26b}', '\u{0}', '\u{0}']), ('\u{2c63}', ['\u{1d7d}', '\u{0}', '\u{0}']), + ('\u{2c64}', ['\u{27d}', '\u{0}', '\u{0}']), ('\u{2c67}', ['\u{2c68}', '\u{0}', '\u{0}']), + ('\u{2c69}', ['\u{2c6a}', '\u{0}', '\u{0}']), ('\u{2c6b}', ['\u{2c6c}', '\u{0}', '\u{0}']), + ('\u{2c6d}', ['\u{251}', '\u{0}', '\u{0}']), ('\u{2c6e}', ['\u{271}', '\u{0}', '\u{0}']), + ('\u{2c6f}', ['\u{250}', '\u{0}', '\u{0}']), ('\u{2c70}', ['\u{252}', '\u{0}', '\u{0}']), + ('\u{2c72}', ['\u{2c73}', '\u{0}', '\u{0}']), ('\u{2c75}', ['\u{2c76}', '\u{0}', '\u{0}']), + ('\u{2c7e}', ['\u{23f}', '\u{0}', '\u{0}']), ('\u{2c7f}', ['\u{240}', '\u{0}', '\u{0}']), + ('\u{2c80}', ['\u{2c81}', '\u{0}', '\u{0}']), ('\u{2c82}', ['\u{2c83}', '\u{0}', '\u{0}']), + ('\u{2c84}', ['\u{2c85}', '\u{0}', '\u{0}']), ('\u{2c86}', ['\u{2c87}', '\u{0}', '\u{0}']), + ('\u{2c88}', ['\u{2c89}', '\u{0}', '\u{0}']), ('\u{2c8a}', ['\u{2c8b}', '\u{0}', '\u{0}']), + ('\u{2c8c}', ['\u{2c8d}', '\u{0}', '\u{0}']), ('\u{2c8e}', ['\u{2c8f}', '\u{0}', '\u{0}']), + ('\u{2c90}', ['\u{2c91}', '\u{0}', '\u{0}']), ('\u{2c92}', ['\u{2c93}', '\u{0}', '\u{0}']), + ('\u{2c94}', ['\u{2c95}', '\u{0}', '\u{0}']), ('\u{2c96}', ['\u{2c97}', '\u{0}', '\u{0}']), + ('\u{2c98}', ['\u{2c99}', '\u{0}', '\u{0}']), ('\u{2c9a}', ['\u{2c9b}', '\u{0}', '\u{0}']), + ('\u{2c9c}', ['\u{2c9d}', '\u{0}', '\u{0}']), ('\u{2c9e}', ['\u{2c9f}', '\u{0}', '\u{0}']), + ('\u{2ca0}', ['\u{2ca1}', '\u{0}', '\u{0}']), ('\u{2ca2}', ['\u{2ca3}', '\u{0}', '\u{0}']), + ('\u{2ca4}', ['\u{2ca5}', '\u{0}', '\u{0}']), ('\u{2ca6}', ['\u{2ca7}', '\u{0}', '\u{0}']), + ('\u{2ca8}', ['\u{2ca9}', '\u{0}', '\u{0}']), ('\u{2caa}', ['\u{2cab}', '\u{0}', '\u{0}']), + ('\u{2cac}', ['\u{2cad}', '\u{0}', '\u{0}']), ('\u{2cae}', ['\u{2caf}', '\u{0}', '\u{0}']), + ('\u{2cb0}', ['\u{2cb1}', '\u{0}', '\u{0}']), ('\u{2cb2}', ['\u{2cb3}', '\u{0}', '\u{0}']), + ('\u{2cb4}', ['\u{2cb5}', '\u{0}', '\u{0}']), ('\u{2cb6}', ['\u{2cb7}', '\u{0}', '\u{0}']), + ('\u{2cb8}', ['\u{2cb9}', '\u{0}', '\u{0}']), ('\u{2cba}', ['\u{2cbb}', '\u{0}', '\u{0}']), + ('\u{2cbc}', ['\u{2cbd}', '\u{0}', '\u{0}']), ('\u{2cbe}', ['\u{2cbf}', '\u{0}', '\u{0}']), + ('\u{2cc0}', ['\u{2cc1}', '\u{0}', '\u{0}']), ('\u{2cc2}', ['\u{2cc3}', '\u{0}', '\u{0}']), + ('\u{2cc4}', ['\u{2cc5}', '\u{0}', '\u{0}']), ('\u{2cc6}', ['\u{2cc7}', '\u{0}', '\u{0}']), + ('\u{2cc8}', ['\u{2cc9}', '\u{0}', '\u{0}']), ('\u{2cca}', ['\u{2ccb}', '\u{0}', '\u{0}']), + ('\u{2ccc}', ['\u{2ccd}', '\u{0}', '\u{0}']), ('\u{2cce}', ['\u{2ccf}', '\u{0}', '\u{0}']), + ('\u{2cd0}', ['\u{2cd1}', '\u{0}', '\u{0}']), ('\u{2cd2}', ['\u{2cd3}', '\u{0}', '\u{0}']), + ('\u{2cd4}', ['\u{2cd5}', '\u{0}', '\u{0}']), ('\u{2cd6}', ['\u{2cd7}', '\u{0}', '\u{0}']), + ('\u{2cd8}', ['\u{2cd9}', '\u{0}', '\u{0}']), ('\u{2cda}', ['\u{2cdb}', '\u{0}', '\u{0}']), + ('\u{2cdc}', ['\u{2cdd}', '\u{0}', '\u{0}']), ('\u{2cde}', ['\u{2cdf}', '\u{0}', '\u{0}']), + ('\u{2ce0}', ['\u{2ce1}', '\u{0}', '\u{0}']), ('\u{2ce2}', ['\u{2ce3}', '\u{0}', '\u{0}']), + ('\u{2ceb}', ['\u{2cec}', '\u{0}', '\u{0}']), ('\u{2ced}', ['\u{2cee}', '\u{0}', '\u{0}']), + ('\u{2cf2}', ['\u{2cf3}', '\u{0}', '\u{0}']), ('\u{a640}', ['\u{a641}', '\u{0}', '\u{0}']), + ('\u{a642}', ['\u{a643}', '\u{0}', '\u{0}']), ('\u{a644}', ['\u{a645}', '\u{0}', '\u{0}']), + ('\u{a646}', ['\u{a647}', '\u{0}', '\u{0}']), ('\u{a648}', ['\u{a649}', '\u{0}', '\u{0}']), + ('\u{a64a}', ['\u{a64b}', '\u{0}', '\u{0}']), ('\u{a64c}', ['\u{a64d}', '\u{0}', '\u{0}']), + ('\u{a64e}', ['\u{a64f}', '\u{0}', '\u{0}']), ('\u{a650}', ['\u{a651}', '\u{0}', '\u{0}']), + ('\u{a652}', ['\u{a653}', '\u{0}', '\u{0}']), ('\u{a654}', ['\u{a655}', '\u{0}', '\u{0}']), + ('\u{a656}', ['\u{a657}', '\u{0}', '\u{0}']), ('\u{a658}', ['\u{a659}', '\u{0}', '\u{0}']), + ('\u{a65a}', ['\u{a65b}', '\u{0}', '\u{0}']), ('\u{a65c}', ['\u{a65d}', '\u{0}', '\u{0}']), + ('\u{a65e}', ['\u{a65f}', '\u{0}', '\u{0}']), ('\u{a660}', ['\u{a661}', '\u{0}', '\u{0}']), + ('\u{a662}', ['\u{a663}', '\u{0}', '\u{0}']), ('\u{a664}', ['\u{a665}', '\u{0}', '\u{0}']), + ('\u{a666}', ['\u{a667}', '\u{0}', '\u{0}']), ('\u{a668}', ['\u{a669}', '\u{0}', '\u{0}']), + ('\u{a66a}', ['\u{a66b}', '\u{0}', '\u{0}']), ('\u{a66c}', ['\u{a66d}', '\u{0}', '\u{0}']), + ('\u{a680}', ['\u{a681}', '\u{0}', '\u{0}']), ('\u{a682}', ['\u{a683}', '\u{0}', '\u{0}']), + ('\u{a684}', ['\u{a685}', '\u{0}', '\u{0}']), ('\u{a686}', ['\u{a687}', '\u{0}', '\u{0}']), + ('\u{a688}', ['\u{a689}', '\u{0}', '\u{0}']), ('\u{a68a}', ['\u{a68b}', '\u{0}', '\u{0}']), + ('\u{a68c}', ['\u{a68d}', '\u{0}', '\u{0}']), ('\u{a68e}', ['\u{a68f}', '\u{0}', '\u{0}']), + ('\u{a690}', ['\u{a691}', '\u{0}', '\u{0}']), ('\u{a692}', ['\u{a693}', '\u{0}', '\u{0}']), + ('\u{a694}', ['\u{a695}', '\u{0}', '\u{0}']), ('\u{a696}', ['\u{a697}', '\u{0}', '\u{0}']), + ('\u{a698}', ['\u{a699}', '\u{0}', '\u{0}']), ('\u{a69a}', ['\u{a69b}', '\u{0}', '\u{0}']), + ('\u{a722}', ['\u{a723}', '\u{0}', '\u{0}']), ('\u{a724}', ['\u{a725}', '\u{0}', '\u{0}']), + ('\u{a726}', ['\u{a727}', '\u{0}', '\u{0}']), ('\u{a728}', ['\u{a729}', '\u{0}', '\u{0}']), + ('\u{a72a}', ['\u{a72b}', '\u{0}', '\u{0}']), ('\u{a72c}', ['\u{a72d}', '\u{0}', '\u{0}']), + ('\u{a72e}', ['\u{a72f}', '\u{0}', '\u{0}']), ('\u{a732}', ['\u{a733}', '\u{0}', '\u{0}']), + ('\u{a734}', ['\u{a735}', '\u{0}', '\u{0}']), ('\u{a736}', ['\u{a737}', '\u{0}', '\u{0}']), + ('\u{a738}', ['\u{a739}', '\u{0}', '\u{0}']), ('\u{a73a}', ['\u{a73b}', '\u{0}', '\u{0}']), + ('\u{a73c}', ['\u{a73d}', '\u{0}', '\u{0}']), ('\u{a73e}', ['\u{a73f}', '\u{0}', '\u{0}']), + ('\u{a740}', ['\u{a741}', '\u{0}', '\u{0}']), ('\u{a742}', ['\u{a743}', '\u{0}', '\u{0}']), + ('\u{a744}', ['\u{a745}', '\u{0}', '\u{0}']), ('\u{a746}', ['\u{a747}', '\u{0}', '\u{0}']), + ('\u{a748}', ['\u{a749}', '\u{0}', '\u{0}']), ('\u{a74a}', ['\u{a74b}', '\u{0}', '\u{0}']), + ('\u{a74c}', ['\u{a74d}', '\u{0}', '\u{0}']), ('\u{a74e}', ['\u{a74f}', '\u{0}', '\u{0}']), + ('\u{a750}', ['\u{a751}', '\u{0}', '\u{0}']), ('\u{a752}', ['\u{a753}', '\u{0}', '\u{0}']), + ('\u{a754}', ['\u{a755}', '\u{0}', '\u{0}']), ('\u{a756}', ['\u{a757}', '\u{0}', '\u{0}']), + ('\u{a758}', ['\u{a759}', '\u{0}', '\u{0}']), ('\u{a75a}', ['\u{a75b}', '\u{0}', '\u{0}']), + ('\u{a75c}', ['\u{a75d}', '\u{0}', '\u{0}']), ('\u{a75e}', ['\u{a75f}', '\u{0}', '\u{0}']), + ('\u{a760}', ['\u{a761}', '\u{0}', '\u{0}']), ('\u{a762}', ['\u{a763}', '\u{0}', '\u{0}']), + ('\u{a764}', ['\u{a765}', '\u{0}', '\u{0}']), ('\u{a766}', ['\u{a767}', '\u{0}', '\u{0}']), + ('\u{a768}', ['\u{a769}', '\u{0}', '\u{0}']), ('\u{a76a}', ['\u{a76b}', '\u{0}', '\u{0}']), + ('\u{a76c}', ['\u{a76d}', '\u{0}', '\u{0}']), ('\u{a76e}', ['\u{a76f}', '\u{0}', '\u{0}']), + ('\u{a779}', ['\u{a77a}', '\u{0}', '\u{0}']), ('\u{a77b}', ['\u{a77c}', '\u{0}', '\u{0}']), + ('\u{a77d}', ['\u{1d79}', '\u{0}', '\u{0}']), ('\u{a77e}', ['\u{a77f}', '\u{0}', '\u{0}']), + ('\u{a780}', ['\u{a781}', '\u{0}', '\u{0}']), ('\u{a782}', ['\u{a783}', '\u{0}', '\u{0}']), + ('\u{a784}', ['\u{a785}', '\u{0}', '\u{0}']), ('\u{a786}', ['\u{a787}', '\u{0}', '\u{0}']), + ('\u{a78b}', ['\u{a78c}', '\u{0}', '\u{0}']), ('\u{a78d}', ['\u{265}', '\u{0}', '\u{0}']), + ('\u{a790}', ['\u{a791}', '\u{0}', '\u{0}']), ('\u{a792}', ['\u{a793}', '\u{0}', '\u{0}']), + ('\u{a796}', ['\u{a797}', '\u{0}', '\u{0}']), ('\u{a798}', ['\u{a799}', '\u{0}', '\u{0}']), + ('\u{a79a}', ['\u{a79b}', '\u{0}', '\u{0}']), ('\u{a79c}', ['\u{a79d}', '\u{0}', '\u{0}']), + ('\u{a79e}', ['\u{a79f}', '\u{0}', '\u{0}']), ('\u{a7a0}', ['\u{a7a1}', '\u{0}', '\u{0}']), + ('\u{a7a2}', ['\u{a7a3}', '\u{0}', '\u{0}']), ('\u{a7a4}', ['\u{a7a5}', '\u{0}', '\u{0}']), + ('\u{a7a6}', ['\u{a7a7}', '\u{0}', '\u{0}']), ('\u{a7a8}', ['\u{a7a9}', '\u{0}', '\u{0}']), + ('\u{a7aa}', ['\u{266}', '\u{0}', '\u{0}']), ('\u{a7ab}', ['\u{25c}', '\u{0}', '\u{0}']), + ('\u{a7ac}', ['\u{261}', '\u{0}', '\u{0}']), ('\u{a7ad}', ['\u{26c}', '\u{0}', '\u{0}']), + ('\u{a7ae}', ['\u{26a}', '\u{0}', '\u{0}']), ('\u{a7b0}', ['\u{29e}', '\u{0}', '\u{0}']), + ('\u{a7b1}', ['\u{287}', '\u{0}', '\u{0}']), ('\u{a7b2}', ['\u{29d}', '\u{0}', '\u{0}']), + ('\u{a7b3}', ['\u{ab53}', '\u{0}', '\u{0}']), ('\u{a7b4}', ['\u{a7b5}', '\u{0}', '\u{0}']), + ('\u{a7b6}', ['\u{a7b7}', '\u{0}', '\u{0}']), ('\u{a7b8}', ['\u{a7b9}', '\u{0}', '\u{0}']), + ('\u{a7ba}', ['\u{a7bb}', '\u{0}', '\u{0}']), ('\u{a7bc}', ['\u{a7bd}', '\u{0}', '\u{0}']), + ('\u{a7be}', ['\u{a7bf}', '\u{0}', '\u{0}']), ('\u{a7c0}', ['\u{a7c1}', '\u{0}', '\u{0}']), + ('\u{a7c2}', ['\u{a7c3}', '\u{0}', '\u{0}']), ('\u{a7c4}', ['\u{a794}', '\u{0}', '\u{0}']), + ('\u{a7c5}', ['\u{282}', '\u{0}', '\u{0}']), ('\u{a7c6}', ['\u{1d8e}', '\u{0}', '\u{0}']), + ('\u{a7c7}', ['\u{a7c8}', '\u{0}', '\u{0}']), ('\u{a7c9}', ['\u{a7ca}', '\u{0}', '\u{0}']), + ('\u{a7cb}', ['\u{264}', '\u{0}', '\u{0}']), ('\u{a7cc}', ['\u{a7cd}', '\u{0}', '\u{0}']), + ('\u{a7ce}', ['\u{a7cf}', '\u{0}', '\u{0}']), ('\u{a7d0}', ['\u{a7d1}', '\u{0}', '\u{0}']), + ('\u{a7d2}', ['\u{a7d3}', '\u{0}', '\u{0}']), ('\u{a7d4}', ['\u{a7d5}', '\u{0}', '\u{0}']), + ('\u{a7d6}', ['\u{a7d7}', '\u{0}', '\u{0}']), ('\u{a7d8}', ['\u{a7d9}', '\u{0}', '\u{0}']), + ('\u{a7da}', ['\u{a7db}', '\u{0}', '\u{0}']), ('\u{a7dc}', ['\u{19b}', '\u{0}', '\u{0}']), + ('\u{a7f5}', ['\u{a7f6}', '\u{0}', '\u{0}']), ('\u{ff21}', ['\u{ff41}', '\u{0}', '\u{0}']), + ('\u{ff22}', ['\u{ff42}', '\u{0}', '\u{0}']), ('\u{ff23}', ['\u{ff43}', '\u{0}', '\u{0}']), + ('\u{ff24}', ['\u{ff44}', '\u{0}', '\u{0}']), ('\u{ff25}', ['\u{ff45}', '\u{0}', '\u{0}']), + ('\u{ff26}', ['\u{ff46}', '\u{0}', '\u{0}']), ('\u{ff27}', ['\u{ff47}', '\u{0}', '\u{0}']), + ('\u{ff28}', ['\u{ff48}', '\u{0}', '\u{0}']), ('\u{ff29}', ['\u{ff49}', '\u{0}', '\u{0}']), + ('\u{ff2a}', ['\u{ff4a}', '\u{0}', '\u{0}']), ('\u{ff2b}', ['\u{ff4b}', '\u{0}', '\u{0}']), + ('\u{ff2c}', ['\u{ff4c}', '\u{0}', '\u{0}']), ('\u{ff2d}', ['\u{ff4d}', '\u{0}', '\u{0}']), + ('\u{ff2e}', ['\u{ff4e}', '\u{0}', '\u{0}']), ('\u{ff2f}', ['\u{ff4f}', '\u{0}', '\u{0}']), + ('\u{ff30}', ['\u{ff50}', '\u{0}', '\u{0}']), ('\u{ff31}', ['\u{ff51}', '\u{0}', '\u{0}']), + ('\u{ff32}', ['\u{ff52}', '\u{0}', '\u{0}']), ('\u{ff33}', ['\u{ff53}', '\u{0}', '\u{0}']), + ('\u{ff34}', ['\u{ff54}', '\u{0}', '\u{0}']), ('\u{ff35}', ['\u{ff55}', '\u{0}', '\u{0}']), + ('\u{ff36}', ['\u{ff56}', '\u{0}', '\u{0}']), ('\u{ff37}', ['\u{ff57}', '\u{0}', '\u{0}']), + ('\u{ff38}', ['\u{ff58}', '\u{0}', '\u{0}']), ('\u{ff39}', ['\u{ff59}', '\u{0}', '\u{0}']), + ('\u{ff3a}', ['\u{ff5a}', '\u{0}', '\u{0}']), + ('\u{10400}', ['\u{10428}', '\u{0}', '\u{0}']), + ('\u{10401}', ['\u{10429}', '\u{0}', '\u{0}']), + ('\u{10402}', ['\u{1042a}', '\u{0}', '\u{0}']), + ('\u{10403}', ['\u{1042b}', '\u{0}', '\u{0}']), + ('\u{10404}', ['\u{1042c}', '\u{0}', '\u{0}']), + ('\u{10405}', ['\u{1042d}', '\u{0}', '\u{0}']), + ('\u{10406}', ['\u{1042e}', '\u{0}', '\u{0}']), + ('\u{10407}', ['\u{1042f}', '\u{0}', '\u{0}']), + ('\u{10408}', ['\u{10430}', '\u{0}', '\u{0}']), + ('\u{10409}', ['\u{10431}', '\u{0}', '\u{0}']), + ('\u{1040a}', ['\u{10432}', '\u{0}', '\u{0}']), + ('\u{1040b}', ['\u{10433}', '\u{0}', '\u{0}']), + ('\u{1040c}', ['\u{10434}', '\u{0}', '\u{0}']), + ('\u{1040d}', ['\u{10435}', '\u{0}', '\u{0}']), + ('\u{1040e}', ['\u{10436}', '\u{0}', '\u{0}']), + ('\u{1040f}', ['\u{10437}', '\u{0}', '\u{0}']), + ('\u{10410}', ['\u{10438}', '\u{0}', '\u{0}']), + ('\u{10411}', ['\u{10439}', '\u{0}', '\u{0}']), + ('\u{10412}', ['\u{1043a}', '\u{0}', '\u{0}']), + ('\u{10413}', ['\u{1043b}', '\u{0}', '\u{0}']), + ('\u{10414}', ['\u{1043c}', '\u{0}', '\u{0}']), + ('\u{10415}', ['\u{1043d}', '\u{0}', '\u{0}']), + ('\u{10416}', ['\u{1043e}', '\u{0}', '\u{0}']), + ('\u{10417}', ['\u{1043f}', '\u{0}', '\u{0}']), + ('\u{10418}', ['\u{10440}', '\u{0}', '\u{0}']), + ('\u{10419}', ['\u{10441}', '\u{0}', '\u{0}']), + ('\u{1041a}', ['\u{10442}', '\u{0}', '\u{0}']), + ('\u{1041b}', ['\u{10443}', '\u{0}', '\u{0}']), + ('\u{1041c}', ['\u{10444}', '\u{0}', '\u{0}']), + ('\u{1041d}', ['\u{10445}', '\u{0}', '\u{0}']), + ('\u{1041e}', ['\u{10446}', '\u{0}', '\u{0}']), + ('\u{1041f}', ['\u{10447}', '\u{0}', '\u{0}']), + ('\u{10420}', ['\u{10448}', '\u{0}', '\u{0}']), + ('\u{10421}', ['\u{10449}', '\u{0}', '\u{0}']), + ('\u{10422}', ['\u{1044a}', '\u{0}', '\u{0}']), + ('\u{10423}', ['\u{1044b}', '\u{0}', '\u{0}']), + ('\u{10424}', ['\u{1044c}', '\u{0}', '\u{0}']), + ('\u{10425}', ['\u{1044d}', '\u{0}', '\u{0}']), + ('\u{10426}', ['\u{1044e}', '\u{0}', '\u{0}']), + ('\u{10427}', ['\u{1044f}', '\u{0}', '\u{0}']), + ('\u{104b0}', ['\u{104d8}', '\u{0}', '\u{0}']), + ('\u{104b1}', ['\u{104d9}', '\u{0}', '\u{0}']), + ('\u{104b2}', ['\u{104da}', '\u{0}', '\u{0}']), + ('\u{104b3}', ['\u{104db}', '\u{0}', '\u{0}']), + ('\u{104b4}', ['\u{104dc}', '\u{0}', '\u{0}']), + ('\u{104b5}', ['\u{104dd}', '\u{0}', '\u{0}']), + ('\u{104b6}', ['\u{104de}', '\u{0}', '\u{0}']), + ('\u{104b7}', ['\u{104df}', '\u{0}', '\u{0}']), + ('\u{104b8}', ['\u{104e0}', '\u{0}', '\u{0}']), + ('\u{104b9}', ['\u{104e1}', '\u{0}', '\u{0}']), + ('\u{104ba}', ['\u{104e2}', '\u{0}', '\u{0}']), + ('\u{104bb}', ['\u{104e3}', '\u{0}', '\u{0}']), + ('\u{104bc}', ['\u{104e4}', '\u{0}', '\u{0}']), + ('\u{104bd}', ['\u{104e5}', '\u{0}', '\u{0}']), + ('\u{104be}', ['\u{104e6}', '\u{0}', '\u{0}']), + ('\u{104bf}', ['\u{104e7}', '\u{0}', '\u{0}']), + ('\u{104c0}', ['\u{104e8}', '\u{0}', '\u{0}']), + ('\u{104c1}', ['\u{104e9}', '\u{0}', '\u{0}']), + ('\u{104c2}', ['\u{104ea}', '\u{0}', '\u{0}']), + ('\u{104c3}', ['\u{104eb}', '\u{0}', '\u{0}']), + ('\u{104c4}', ['\u{104ec}', '\u{0}', '\u{0}']), + ('\u{104c5}', ['\u{104ed}', '\u{0}', '\u{0}']), + ('\u{104c6}', ['\u{104ee}', '\u{0}', '\u{0}']), + ('\u{104c7}', ['\u{104ef}', '\u{0}', '\u{0}']), + ('\u{104c8}', ['\u{104f0}', '\u{0}', '\u{0}']), + ('\u{104c9}', ['\u{104f1}', '\u{0}', '\u{0}']), + ('\u{104ca}', ['\u{104f2}', '\u{0}', '\u{0}']), + ('\u{104cb}', ['\u{104f3}', '\u{0}', '\u{0}']), + ('\u{104cc}', ['\u{104f4}', '\u{0}', '\u{0}']), + ('\u{104cd}', ['\u{104f5}', '\u{0}', '\u{0}']), + ('\u{104ce}', ['\u{104f6}', '\u{0}', '\u{0}']), + ('\u{104cf}', ['\u{104f7}', '\u{0}', '\u{0}']), + ('\u{104d0}', ['\u{104f8}', '\u{0}', '\u{0}']), + ('\u{104d1}', ['\u{104f9}', '\u{0}', '\u{0}']), + ('\u{104d2}', ['\u{104fa}', '\u{0}', '\u{0}']), + ('\u{104d3}', ['\u{104fb}', '\u{0}', '\u{0}']), + ('\u{10570}', ['\u{10597}', '\u{0}', '\u{0}']), + ('\u{10571}', ['\u{10598}', '\u{0}', '\u{0}']), + ('\u{10572}', ['\u{10599}', '\u{0}', '\u{0}']), + ('\u{10573}', ['\u{1059a}', '\u{0}', '\u{0}']), + ('\u{10574}', ['\u{1059b}', '\u{0}', '\u{0}']), + ('\u{10575}', ['\u{1059c}', '\u{0}', '\u{0}']), + ('\u{10576}', ['\u{1059d}', '\u{0}', '\u{0}']), + ('\u{10577}', ['\u{1059e}', '\u{0}', '\u{0}']), + ('\u{10578}', ['\u{1059f}', '\u{0}', '\u{0}']), + ('\u{10579}', ['\u{105a0}', '\u{0}', '\u{0}']), + ('\u{1057a}', ['\u{105a1}', '\u{0}', '\u{0}']), + ('\u{1057c}', ['\u{105a3}', '\u{0}', '\u{0}']), + ('\u{1057d}', ['\u{105a4}', '\u{0}', '\u{0}']), + ('\u{1057e}', ['\u{105a5}', '\u{0}', '\u{0}']), + ('\u{1057f}', ['\u{105a6}', '\u{0}', '\u{0}']), + ('\u{10580}', ['\u{105a7}', '\u{0}', '\u{0}']), + ('\u{10581}', ['\u{105a8}', '\u{0}', '\u{0}']), + ('\u{10582}', ['\u{105a9}', '\u{0}', '\u{0}']), + ('\u{10583}', ['\u{105aa}', '\u{0}', '\u{0}']), + ('\u{10584}', ['\u{105ab}', '\u{0}', '\u{0}']), + ('\u{10585}', ['\u{105ac}', '\u{0}', '\u{0}']), + ('\u{10586}', ['\u{105ad}', '\u{0}', '\u{0}']), + ('\u{10587}', ['\u{105ae}', '\u{0}', '\u{0}']), + ('\u{10588}', ['\u{105af}', '\u{0}', '\u{0}']), + ('\u{10589}', ['\u{105b0}', '\u{0}', '\u{0}']), + ('\u{1058a}', ['\u{105b1}', '\u{0}', '\u{0}']), + ('\u{1058c}', ['\u{105b3}', '\u{0}', '\u{0}']), + ('\u{1058d}', ['\u{105b4}', '\u{0}', '\u{0}']), + ('\u{1058e}', ['\u{105b5}', '\u{0}', '\u{0}']), + ('\u{1058f}', ['\u{105b6}', '\u{0}', '\u{0}']), + ('\u{10590}', ['\u{105b7}', '\u{0}', '\u{0}']), + ('\u{10591}', ['\u{105b8}', '\u{0}', '\u{0}']), + ('\u{10592}', ['\u{105b9}', '\u{0}', '\u{0}']), + ('\u{10594}', ['\u{105bb}', '\u{0}', '\u{0}']), + ('\u{10595}', ['\u{105bc}', '\u{0}', '\u{0}']), + ('\u{10c80}', ['\u{10cc0}', '\u{0}', '\u{0}']), + ('\u{10c81}', ['\u{10cc1}', '\u{0}', '\u{0}']), + ('\u{10c82}', ['\u{10cc2}', '\u{0}', '\u{0}']), + ('\u{10c83}', ['\u{10cc3}', '\u{0}', '\u{0}']), + ('\u{10c84}', ['\u{10cc4}', '\u{0}', '\u{0}']), + ('\u{10c85}', ['\u{10cc5}', '\u{0}', '\u{0}']), + ('\u{10c86}', ['\u{10cc6}', '\u{0}', '\u{0}']), + ('\u{10c87}', ['\u{10cc7}', '\u{0}', '\u{0}']), + ('\u{10c88}', ['\u{10cc8}', '\u{0}', '\u{0}']), + ('\u{10c89}', ['\u{10cc9}', '\u{0}', '\u{0}']), + ('\u{10c8a}', ['\u{10cca}', '\u{0}', '\u{0}']), + ('\u{10c8b}', ['\u{10ccb}', '\u{0}', '\u{0}']), + ('\u{10c8c}', ['\u{10ccc}', '\u{0}', '\u{0}']), + ('\u{10c8d}', ['\u{10ccd}', '\u{0}', '\u{0}']), + ('\u{10c8e}', ['\u{10cce}', '\u{0}', '\u{0}']), + ('\u{10c8f}', ['\u{10ccf}', '\u{0}', '\u{0}']), + ('\u{10c90}', ['\u{10cd0}', '\u{0}', '\u{0}']), + ('\u{10c91}', ['\u{10cd1}', '\u{0}', '\u{0}']), + ('\u{10c92}', ['\u{10cd2}', '\u{0}', '\u{0}']), + ('\u{10c93}', ['\u{10cd3}', '\u{0}', '\u{0}']), + ('\u{10c94}', ['\u{10cd4}', '\u{0}', '\u{0}']), + ('\u{10c95}', ['\u{10cd5}', '\u{0}', '\u{0}']), + ('\u{10c96}', ['\u{10cd6}', '\u{0}', '\u{0}']), + ('\u{10c97}', ['\u{10cd7}', '\u{0}', '\u{0}']), + ('\u{10c98}', ['\u{10cd8}', '\u{0}', '\u{0}']), + ('\u{10c99}', ['\u{10cd9}', '\u{0}', '\u{0}']), + ('\u{10c9a}', ['\u{10cda}', '\u{0}', '\u{0}']), + ('\u{10c9b}', ['\u{10cdb}', '\u{0}', '\u{0}']), + ('\u{10c9c}', ['\u{10cdc}', '\u{0}', '\u{0}']), + ('\u{10c9d}', ['\u{10cdd}', '\u{0}', '\u{0}']), + ('\u{10c9e}', ['\u{10cde}', '\u{0}', '\u{0}']), + ('\u{10c9f}', ['\u{10cdf}', '\u{0}', '\u{0}']), + ('\u{10ca0}', ['\u{10ce0}', '\u{0}', '\u{0}']), + ('\u{10ca1}', ['\u{10ce1}', '\u{0}', '\u{0}']), + ('\u{10ca2}', ['\u{10ce2}', '\u{0}', '\u{0}']), + ('\u{10ca3}', ['\u{10ce3}', '\u{0}', '\u{0}']), + ('\u{10ca4}', ['\u{10ce4}', '\u{0}', '\u{0}']), + ('\u{10ca5}', ['\u{10ce5}', '\u{0}', '\u{0}']), + ('\u{10ca6}', ['\u{10ce6}', '\u{0}', '\u{0}']), + ('\u{10ca7}', ['\u{10ce7}', '\u{0}', '\u{0}']), + ('\u{10ca8}', ['\u{10ce8}', '\u{0}', '\u{0}']), + ('\u{10ca9}', ['\u{10ce9}', '\u{0}', '\u{0}']), + ('\u{10caa}', ['\u{10cea}', '\u{0}', '\u{0}']), + ('\u{10cab}', ['\u{10ceb}', '\u{0}', '\u{0}']), + ('\u{10cac}', ['\u{10cec}', '\u{0}', '\u{0}']), + ('\u{10cad}', ['\u{10ced}', '\u{0}', '\u{0}']), + ('\u{10cae}', ['\u{10cee}', '\u{0}', '\u{0}']), + ('\u{10caf}', ['\u{10cef}', '\u{0}', '\u{0}']), + ('\u{10cb0}', ['\u{10cf0}', '\u{0}', '\u{0}']), + ('\u{10cb1}', ['\u{10cf1}', '\u{0}', '\u{0}']), + ('\u{10cb2}', ['\u{10cf2}', '\u{0}', '\u{0}']), + ('\u{10d50}', ['\u{10d70}', '\u{0}', '\u{0}']), + ('\u{10d51}', ['\u{10d71}', '\u{0}', '\u{0}']), + ('\u{10d52}', ['\u{10d72}', '\u{0}', '\u{0}']), + ('\u{10d53}', ['\u{10d73}', '\u{0}', '\u{0}']), + ('\u{10d54}', ['\u{10d74}', '\u{0}', '\u{0}']), + ('\u{10d55}', ['\u{10d75}', '\u{0}', '\u{0}']), + ('\u{10d56}', ['\u{10d76}', '\u{0}', '\u{0}']), + ('\u{10d57}', ['\u{10d77}', '\u{0}', '\u{0}']), + ('\u{10d58}', ['\u{10d78}', '\u{0}', '\u{0}']), + ('\u{10d59}', ['\u{10d79}', '\u{0}', '\u{0}']), + ('\u{10d5a}', ['\u{10d7a}', '\u{0}', '\u{0}']), + ('\u{10d5b}', ['\u{10d7b}', '\u{0}', '\u{0}']), + ('\u{10d5c}', ['\u{10d7c}', '\u{0}', '\u{0}']), + ('\u{10d5d}', ['\u{10d7d}', '\u{0}', '\u{0}']), + ('\u{10d5e}', ['\u{10d7e}', '\u{0}', '\u{0}']), + ('\u{10d5f}', ['\u{10d7f}', '\u{0}', '\u{0}']), + ('\u{10d60}', ['\u{10d80}', '\u{0}', '\u{0}']), + ('\u{10d61}', ['\u{10d81}', '\u{0}', '\u{0}']), + ('\u{10d62}', ['\u{10d82}', '\u{0}', '\u{0}']), + ('\u{10d63}', ['\u{10d83}', '\u{0}', '\u{0}']), + ('\u{10d64}', ['\u{10d84}', '\u{0}', '\u{0}']), + ('\u{10d65}', ['\u{10d85}', '\u{0}', '\u{0}']), + ('\u{118a0}', ['\u{118c0}', '\u{0}', '\u{0}']), + ('\u{118a1}', ['\u{118c1}', '\u{0}', '\u{0}']), + ('\u{118a2}', ['\u{118c2}', '\u{0}', '\u{0}']), + ('\u{118a3}', ['\u{118c3}', '\u{0}', '\u{0}']), + ('\u{118a4}', ['\u{118c4}', '\u{0}', '\u{0}']), + ('\u{118a5}', ['\u{118c5}', '\u{0}', '\u{0}']), + ('\u{118a6}', ['\u{118c6}', '\u{0}', '\u{0}']), + ('\u{118a7}', ['\u{118c7}', '\u{0}', '\u{0}']), + ('\u{118a8}', ['\u{118c8}', '\u{0}', '\u{0}']), + ('\u{118a9}', ['\u{118c9}', '\u{0}', '\u{0}']), + ('\u{118aa}', ['\u{118ca}', '\u{0}', '\u{0}']), + ('\u{118ab}', ['\u{118cb}', '\u{0}', '\u{0}']), + ('\u{118ac}', ['\u{118cc}', '\u{0}', '\u{0}']), + ('\u{118ad}', ['\u{118cd}', '\u{0}', '\u{0}']), + ('\u{118ae}', ['\u{118ce}', '\u{0}', '\u{0}']), + ('\u{118af}', ['\u{118cf}', '\u{0}', '\u{0}']), + ('\u{118b0}', ['\u{118d0}', '\u{0}', '\u{0}']), + ('\u{118b1}', ['\u{118d1}', '\u{0}', '\u{0}']), + ('\u{118b2}', ['\u{118d2}', '\u{0}', '\u{0}']), + ('\u{118b3}', ['\u{118d3}', '\u{0}', '\u{0}']), + ('\u{118b4}', ['\u{118d4}', '\u{0}', '\u{0}']), + ('\u{118b5}', ['\u{118d5}', '\u{0}', '\u{0}']), + ('\u{118b6}', ['\u{118d6}', '\u{0}', '\u{0}']), + ('\u{118b7}', ['\u{118d7}', '\u{0}', '\u{0}']), + ('\u{118b8}', ['\u{118d8}', '\u{0}', '\u{0}']), + ('\u{118b9}', ['\u{118d9}', '\u{0}', '\u{0}']), + ('\u{118ba}', ['\u{118da}', '\u{0}', '\u{0}']), + ('\u{118bb}', ['\u{118db}', '\u{0}', '\u{0}']), + ('\u{118bc}', ['\u{118dc}', '\u{0}', '\u{0}']), + ('\u{118bd}', ['\u{118dd}', '\u{0}', '\u{0}']), + ('\u{118be}', ['\u{118de}', '\u{0}', '\u{0}']), + ('\u{118bf}', ['\u{118df}', '\u{0}', '\u{0}']), + ('\u{16e40}', ['\u{16e60}', '\u{0}', '\u{0}']), + ('\u{16e41}', ['\u{16e61}', '\u{0}', '\u{0}']), + ('\u{16e42}', ['\u{16e62}', '\u{0}', '\u{0}']), + ('\u{16e43}', ['\u{16e63}', '\u{0}', '\u{0}']), + ('\u{16e44}', ['\u{16e64}', '\u{0}', '\u{0}']), + ('\u{16e45}', ['\u{16e65}', '\u{0}', '\u{0}']), + ('\u{16e46}', ['\u{16e66}', '\u{0}', '\u{0}']), + ('\u{16e47}', ['\u{16e67}', '\u{0}', '\u{0}']), + ('\u{16e48}', ['\u{16e68}', '\u{0}', '\u{0}']), + ('\u{16e49}', ['\u{16e69}', '\u{0}', '\u{0}']), + ('\u{16e4a}', ['\u{16e6a}', '\u{0}', '\u{0}']), + ('\u{16e4b}', ['\u{16e6b}', '\u{0}', '\u{0}']), + ('\u{16e4c}', ['\u{16e6c}', '\u{0}', '\u{0}']), + ('\u{16e4d}', ['\u{16e6d}', '\u{0}', '\u{0}']), + ('\u{16e4e}', ['\u{16e6e}', '\u{0}', '\u{0}']), + ('\u{16e4f}', ['\u{16e6f}', '\u{0}', '\u{0}']), + ('\u{16e50}', ['\u{16e70}', '\u{0}', '\u{0}']), + ('\u{16e51}', ['\u{16e71}', '\u{0}', '\u{0}']), + ('\u{16e52}', ['\u{16e72}', '\u{0}', '\u{0}']), + ('\u{16e53}', ['\u{16e73}', '\u{0}', '\u{0}']), + ('\u{16e54}', ['\u{16e74}', '\u{0}', '\u{0}']), + ('\u{16e55}', ['\u{16e75}', '\u{0}', '\u{0}']), + ('\u{16e56}', ['\u{16e76}', '\u{0}', '\u{0}']), + ('\u{16e57}', ['\u{16e77}', '\u{0}', '\u{0}']), + ('\u{16e58}', ['\u{16e78}', '\u{0}', '\u{0}']), + ('\u{16e59}', ['\u{16e79}', '\u{0}', '\u{0}']), + ('\u{16e5a}', ['\u{16e7a}', '\u{0}', '\u{0}']), + ('\u{16e5b}', ['\u{16e7b}', '\u{0}', '\u{0}']), + ('\u{16e5c}', ['\u{16e7c}', '\u{0}', '\u{0}']), + ('\u{16e5d}', ['\u{16e7d}', '\u{0}', '\u{0}']), + ('\u{16e5e}', ['\u{16e7e}', '\u{0}', '\u{0}']), + ('\u{16e5f}', ['\u{16e7f}', '\u{0}', '\u{0}']), + ('\u{16ea0}', ['\u{16ebb}', '\u{0}', '\u{0}']), + ('\u{16ea1}', ['\u{16ebc}', '\u{0}', '\u{0}']), + ('\u{16ea2}', ['\u{16ebd}', '\u{0}', '\u{0}']), + ('\u{16ea3}', ['\u{16ebe}', '\u{0}', '\u{0}']), + ('\u{16ea4}', ['\u{16ebf}', '\u{0}', '\u{0}']), + ('\u{16ea5}', ['\u{16ec0}', '\u{0}', '\u{0}']), + ('\u{16ea6}', ['\u{16ec1}', '\u{0}', '\u{0}']), + ('\u{16ea7}', ['\u{16ec2}', '\u{0}', '\u{0}']), + ('\u{16ea8}', ['\u{16ec3}', '\u{0}', '\u{0}']), + ('\u{16ea9}', ['\u{16ec4}', '\u{0}', '\u{0}']), + ('\u{16eaa}', ['\u{16ec5}', '\u{0}', '\u{0}']), + ('\u{16eab}', ['\u{16ec6}', '\u{0}', '\u{0}']), + ('\u{16eac}', ['\u{16ec7}', '\u{0}', '\u{0}']), + ('\u{16ead}', ['\u{16ec8}', '\u{0}', '\u{0}']), + ('\u{16eae}', ['\u{16ec9}', '\u{0}', '\u{0}']), + ('\u{16eaf}', ['\u{16eca}', '\u{0}', '\u{0}']), + ('\u{16eb0}', ['\u{16ecb}', '\u{0}', '\u{0}']), + ('\u{16eb1}', ['\u{16ecc}', '\u{0}', '\u{0}']), + ('\u{16eb2}', ['\u{16ecd}', '\u{0}', '\u{0}']), + ('\u{16eb3}', ['\u{16ece}', '\u{0}', '\u{0}']), + ('\u{16eb4}', ['\u{16ecf}', '\u{0}', '\u{0}']), + ('\u{16eb5}', ['\u{16ed0}', '\u{0}', '\u{0}']), + ('\u{16eb6}', ['\u{16ed1}', '\u{0}', '\u{0}']), + ('\u{16eb7}', ['\u{16ed2}', '\u{0}', '\u{0}']), + ('\u{16eb8}', ['\u{16ed3}', '\u{0}', '\u{0}']), + ('\u{1e900}', ['\u{1e922}', '\u{0}', '\u{0}']), + ('\u{1e901}', ['\u{1e923}', '\u{0}', '\u{0}']), + ('\u{1e902}', ['\u{1e924}', '\u{0}', '\u{0}']), + ('\u{1e903}', ['\u{1e925}', '\u{0}', '\u{0}']), + ('\u{1e904}', ['\u{1e926}', '\u{0}', '\u{0}']), + ('\u{1e905}', ['\u{1e927}', '\u{0}', '\u{0}']), + ('\u{1e906}', ['\u{1e928}', '\u{0}', '\u{0}']), + ('\u{1e907}', ['\u{1e929}', '\u{0}', '\u{0}']), + ('\u{1e908}', ['\u{1e92a}', '\u{0}', '\u{0}']), + ('\u{1e909}', ['\u{1e92b}', '\u{0}', '\u{0}']), + ('\u{1e90a}', ['\u{1e92c}', '\u{0}', '\u{0}']), + ('\u{1e90b}', ['\u{1e92d}', '\u{0}', '\u{0}']), + ('\u{1e90c}', ['\u{1e92e}', '\u{0}', '\u{0}']), + ('\u{1e90d}', ['\u{1e92f}', '\u{0}', '\u{0}']), + ('\u{1e90e}', ['\u{1e930}', '\u{0}', '\u{0}']), + ('\u{1e90f}', ['\u{1e931}', '\u{0}', '\u{0}']), + ('\u{1e910}', ['\u{1e932}', '\u{0}', '\u{0}']), + ('\u{1e911}', ['\u{1e933}', '\u{0}', '\u{0}']), + ('\u{1e912}', ['\u{1e934}', '\u{0}', '\u{0}']), + ('\u{1e913}', ['\u{1e935}', '\u{0}', '\u{0}']), + ('\u{1e914}', ['\u{1e936}', '\u{0}', '\u{0}']), + ('\u{1e915}', ['\u{1e937}', '\u{0}', '\u{0}']), + ('\u{1e916}', ['\u{1e938}', '\u{0}', '\u{0}']), + ('\u{1e917}', ['\u{1e939}', '\u{0}', '\u{0}']), + ('\u{1e918}', ['\u{1e93a}', '\u{0}', '\u{0}']), + ('\u{1e919}', ['\u{1e93b}', '\u{0}', '\u{0}']), + ('\u{1e91a}', ['\u{1e93c}', '\u{0}', '\u{0}']), + ('\u{1e91b}', ['\u{1e93d}', '\u{0}', '\u{0}']), + ('\u{1e91c}', ['\u{1e93e}', '\u{0}', '\u{0}']), + ('\u{1e91d}', ['\u{1e93f}', '\u{0}', '\u{0}']), + ('\u{1e91e}', ['\u{1e940}', '\u{0}', '\u{0}']), + ('\u{1e91f}', ['\u{1e941}', '\u{0}', '\u{0}']), + ('\u{1e920}', ['\u{1e942}', '\u{0}', '\u{0}']), + ('\u{1e921}', ['\u{1e943}', '\u{0}', '\u{0}']), +]; + +#[rustfmt::skip] +pub(super) static TO_UPPER: &[(char, [char; 3]); 1580] = &[ + ('\u{61}', ['\u{41}', '\u{0}', '\u{0}']), ('\u{62}', ['\u{42}', '\u{0}', '\u{0}']), + ('\u{63}', ['\u{43}', '\u{0}', '\u{0}']), ('\u{64}', ['\u{44}', '\u{0}', '\u{0}']), + ('\u{65}', ['\u{45}', '\u{0}', '\u{0}']), ('\u{66}', ['\u{46}', '\u{0}', '\u{0}']), + ('\u{67}', ['\u{47}', '\u{0}', '\u{0}']), ('\u{68}', ['\u{48}', '\u{0}', '\u{0}']), + ('\u{69}', ['\u{49}', '\u{0}', '\u{0}']), ('\u{6a}', ['\u{4a}', '\u{0}', '\u{0}']), + ('\u{6b}', ['\u{4b}', '\u{0}', '\u{0}']), ('\u{6c}', ['\u{4c}', '\u{0}', '\u{0}']), + ('\u{6d}', ['\u{4d}', '\u{0}', '\u{0}']), ('\u{6e}', ['\u{4e}', '\u{0}', '\u{0}']), + ('\u{6f}', ['\u{4f}', '\u{0}', '\u{0}']), ('\u{70}', ['\u{50}', '\u{0}', '\u{0}']), + ('\u{71}', ['\u{51}', '\u{0}', '\u{0}']), ('\u{72}', ['\u{52}', '\u{0}', '\u{0}']), + ('\u{73}', ['\u{53}', '\u{0}', '\u{0}']), ('\u{74}', ['\u{54}', '\u{0}', '\u{0}']), + ('\u{75}', ['\u{55}', '\u{0}', '\u{0}']), ('\u{76}', ['\u{56}', '\u{0}', '\u{0}']), + ('\u{77}', ['\u{57}', '\u{0}', '\u{0}']), ('\u{78}', ['\u{58}', '\u{0}', '\u{0}']), + ('\u{79}', ['\u{59}', '\u{0}', '\u{0}']), ('\u{7a}', ['\u{5a}', '\u{0}', '\u{0}']), + ('\u{b5}', ['\u{39c}', '\u{0}', '\u{0}']), ('\u{df}', ['\u{53}', '\u{53}', '\u{0}']), + ('\u{e0}', ['\u{c0}', '\u{0}', '\u{0}']), ('\u{e1}', ['\u{c1}', '\u{0}', '\u{0}']), + ('\u{e2}', ['\u{c2}', '\u{0}', '\u{0}']), ('\u{e3}', ['\u{c3}', '\u{0}', '\u{0}']), + ('\u{e4}', ['\u{c4}', '\u{0}', '\u{0}']), ('\u{e5}', ['\u{c5}', '\u{0}', '\u{0}']), + ('\u{e6}', ['\u{c6}', '\u{0}', '\u{0}']), ('\u{e7}', ['\u{c7}', '\u{0}', '\u{0}']), + ('\u{e8}', ['\u{c8}', '\u{0}', '\u{0}']), ('\u{e9}', ['\u{c9}', '\u{0}', '\u{0}']), + ('\u{ea}', ['\u{ca}', '\u{0}', '\u{0}']), ('\u{eb}', ['\u{cb}', '\u{0}', '\u{0}']), + ('\u{ec}', ['\u{cc}', '\u{0}', '\u{0}']), ('\u{ed}', ['\u{cd}', '\u{0}', '\u{0}']), + ('\u{ee}', ['\u{ce}', '\u{0}', '\u{0}']), ('\u{ef}', ['\u{cf}', '\u{0}', '\u{0}']), + ('\u{f0}', ['\u{d0}', '\u{0}', '\u{0}']), ('\u{f1}', ['\u{d1}', '\u{0}', '\u{0}']), + ('\u{f2}', ['\u{d2}', '\u{0}', '\u{0}']), ('\u{f3}', ['\u{d3}', '\u{0}', '\u{0}']), + ('\u{f4}', ['\u{d4}', '\u{0}', '\u{0}']), ('\u{f5}', ['\u{d5}', '\u{0}', '\u{0}']), + ('\u{f6}', ['\u{d6}', '\u{0}', '\u{0}']), ('\u{f8}', ['\u{d8}', '\u{0}', '\u{0}']), + ('\u{f9}', ['\u{d9}', '\u{0}', '\u{0}']), ('\u{fa}', ['\u{da}', '\u{0}', '\u{0}']), + ('\u{fb}', ['\u{db}', '\u{0}', '\u{0}']), ('\u{fc}', ['\u{dc}', '\u{0}', '\u{0}']), + ('\u{fd}', ['\u{dd}', '\u{0}', '\u{0}']), ('\u{fe}', ['\u{de}', '\u{0}', '\u{0}']), + ('\u{ff}', ['\u{178}', '\u{0}', '\u{0}']), ('\u{101}', ['\u{100}', '\u{0}', '\u{0}']), + ('\u{103}', ['\u{102}', '\u{0}', '\u{0}']), ('\u{105}', ['\u{104}', '\u{0}', '\u{0}']), + ('\u{107}', ['\u{106}', '\u{0}', '\u{0}']), ('\u{109}', ['\u{108}', '\u{0}', '\u{0}']), + ('\u{10b}', ['\u{10a}', '\u{0}', '\u{0}']), ('\u{10d}', ['\u{10c}', '\u{0}', '\u{0}']), + ('\u{10f}', ['\u{10e}', '\u{0}', '\u{0}']), ('\u{111}', ['\u{110}', '\u{0}', '\u{0}']), + ('\u{113}', ['\u{112}', '\u{0}', '\u{0}']), ('\u{115}', ['\u{114}', '\u{0}', '\u{0}']), + ('\u{117}', ['\u{116}', '\u{0}', '\u{0}']), ('\u{119}', ['\u{118}', '\u{0}', '\u{0}']), + ('\u{11b}', ['\u{11a}', '\u{0}', '\u{0}']), ('\u{11d}', ['\u{11c}', '\u{0}', '\u{0}']), + ('\u{11f}', ['\u{11e}', '\u{0}', '\u{0}']), ('\u{121}', ['\u{120}', '\u{0}', '\u{0}']), + ('\u{123}', ['\u{122}', '\u{0}', '\u{0}']), ('\u{125}', ['\u{124}', '\u{0}', '\u{0}']), + ('\u{127}', ['\u{126}', '\u{0}', '\u{0}']), ('\u{129}', ['\u{128}', '\u{0}', '\u{0}']), + ('\u{12b}', ['\u{12a}', '\u{0}', '\u{0}']), ('\u{12d}', ['\u{12c}', '\u{0}', '\u{0}']), + ('\u{12f}', ['\u{12e}', '\u{0}', '\u{0}']), ('\u{131}', ['\u{49}', '\u{0}', '\u{0}']), + ('\u{133}', ['\u{132}', '\u{0}', '\u{0}']), ('\u{135}', ['\u{134}', '\u{0}', '\u{0}']), + ('\u{137}', ['\u{136}', '\u{0}', '\u{0}']), ('\u{13a}', ['\u{139}', '\u{0}', '\u{0}']), + ('\u{13c}', ['\u{13b}', '\u{0}', '\u{0}']), ('\u{13e}', ['\u{13d}', '\u{0}', '\u{0}']), + ('\u{140}', ['\u{13f}', '\u{0}', '\u{0}']), ('\u{142}', ['\u{141}', '\u{0}', '\u{0}']), + ('\u{144}', ['\u{143}', '\u{0}', '\u{0}']), ('\u{146}', ['\u{145}', '\u{0}', '\u{0}']), + ('\u{148}', ['\u{147}', '\u{0}', '\u{0}']), ('\u{149}', ['\u{2bc}', '\u{4e}', '\u{0}']), + ('\u{14b}', ['\u{14a}', '\u{0}', '\u{0}']), ('\u{14d}', ['\u{14c}', '\u{0}', '\u{0}']), + ('\u{14f}', ['\u{14e}', '\u{0}', '\u{0}']), ('\u{151}', ['\u{150}', '\u{0}', '\u{0}']), + ('\u{153}', ['\u{152}', '\u{0}', '\u{0}']), ('\u{155}', ['\u{154}', '\u{0}', '\u{0}']), + ('\u{157}', ['\u{156}', '\u{0}', '\u{0}']), ('\u{159}', ['\u{158}', '\u{0}', '\u{0}']), + ('\u{15b}', ['\u{15a}', '\u{0}', '\u{0}']), ('\u{15d}', ['\u{15c}', '\u{0}', '\u{0}']), + ('\u{15f}', ['\u{15e}', '\u{0}', '\u{0}']), ('\u{161}', ['\u{160}', '\u{0}', '\u{0}']), + ('\u{163}', ['\u{162}', '\u{0}', '\u{0}']), ('\u{165}', ['\u{164}', '\u{0}', '\u{0}']), + ('\u{167}', ['\u{166}', '\u{0}', '\u{0}']), ('\u{169}', ['\u{168}', '\u{0}', '\u{0}']), + ('\u{16b}', ['\u{16a}', '\u{0}', '\u{0}']), ('\u{16d}', ['\u{16c}', '\u{0}', '\u{0}']), + ('\u{16f}', ['\u{16e}', '\u{0}', '\u{0}']), ('\u{171}', ['\u{170}', '\u{0}', '\u{0}']), + ('\u{173}', ['\u{172}', '\u{0}', '\u{0}']), ('\u{175}', ['\u{174}', '\u{0}', '\u{0}']), + ('\u{177}', ['\u{176}', '\u{0}', '\u{0}']), ('\u{17a}', ['\u{179}', '\u{0}', '\u{0}']), + ('\u{17c}', ['\u{17b}', '\u{0}', '\u{0}']), ('\u{17e}', ['\u{17d}', '\u{0}', '\u{0}']), + ('\u{17f}', ['\u{53}', '\u{0}', '\u{0}']), ('\u{180}', ['\u{243}', '\u{0}', '\u{0}']), + ('\u{183}', ['\u{182}', '\u{0}', '\u{0}']), ('\u{185}', ['\u{184}', '\u{0}', '\u{0}']), + ('\u{188}', ['\u{187}', '\u{0}', '\u{0}']), ('\u{18c}', ['\u{18b}', '\u{0}', '\u{0}']), + ('\u{192}', ['\u{191}', '\u{0}', '\u{0}']), ('\u{195}', ['\u{1f6}', '\u{0}', '\u{0}']), + ('\u{199}', ['\u{198}', '\u{0}', '\u{0}']), ('\u{19a}', ['\u{23d}', '\u{0}', '\u{0}']), + ('\u{19b}', ['\u{a7dc}', '\u{0}', '\u{0}']), ('\u{19e}', ['\u{220}', '\u{0}', '\u{0}']), + ('\u{1a1}', ['\u{1a0}', '\u{0}', '\u{0}']), ('\u{1a3}', ['\u{1a2}', '\u{0}', '\u{0}']), + ('\u{1a5}', ['\u{1a4}', '\u{0}', '\u{0}']), ('\u{1a8}', ['\u{1a7}', '\u{0}', '\u{0}']), + ('\u{1ad}', ['\u{1ac}', '\u{0}', '\u{0}']), ('\u{1b0}', ['\u{1af}', '\u{0}', '\u{0}']), + ('\u{1b4}', ['\u{1b3}', '\u{0}', '\u{0}']), ('\u{1b6}', ['\u{1b5}', '\u{0}', '\u{0}']), + ('\u{1b9}', ['\u{1b8}', '\u{0}', '\u{0}']), ('\u{1bd}', ['\u{1bc}', '\u{0}', '\u{0}']), + ('\u{1bf}', ['\u{1f7}', '\u{0}', '\u{0}']), ('\u{1c5}', ['\u{1c4}', '\u{0}', '\u{0}']), + ('\u{1c6}', ['\u{1c4}', '\u{0}', '\u{0}']), ('\u{1c8}', ['\u{1c7}', '\u{0}', '\u{0}']), + ('\u{1c9}', ['\u{1c7}', '\u{0}', '\u{0}']), ('\u{1cb}', ['\u{1ca}', '\u{0}', '\u{0}']), + ('\u{1cc}', ['\u{1ca}', '\u{0}', '\u{0}']), ('\u{1ce}', ['\u{1cd}', '\u{0}', '\u{0}']), + ('\u{1d0}', ['\u{1cf}', '\u{0}', '\u{0}']), ('\u{1d2}', ['\u{1d1}', '\u{0}', '\u{0}']), + ('\u{1d4}', ['\u{1d3}', '\u{0}', '\u{0}']), ('\u{1d6}', ['\u{1d5}', '\u{0}', '\u{0}']), + ('\u{1d8}', ['\u{1d7}', '\u{0}', '\u{0}']), ('\u{1da}', ['\u{1d9}', '\u{0}', '\u{0}']), + ('\u{1dc}', ['\u{1db}', '\u{0}', '\u{0}']), ('\u{1dd}', ['\u{18e}', '\u{0}', '\u{0}']), + ('\u{1df}', ['\u{1de}', '\u{0}', '\u{0}']), ('\u{1e1}', ['\u{1e0}', '\u{0}', '\u{0}']), + ('\u{1e3}', ['\u{1e2}', '\u{0}', '\u{0}']), ('\u{1e5}', ['\u{1e4}', '\u{0}', '\u{0}']), + ('\u{1e7}', ['\u{1e6}', '\u{0}', '\u{0}']), ('\u{1e9}', ['\u{1e8}', '\u{0}', '\u{0}']), + ('\u{1eb}', ['\u{1ea}', '\u{0}', '\u{0}']), ('\u{1ed}', ['\u{1ec}', '\u{0}', '\u{0}']), + ('\u{1ef}', ['\u{1ee}', '\u{0}', '\u{0}']), ('\u{1f0}', ['\u{4a}', '\u{30c}', '\u{0}']), + ('\u{1f2}', ['\u{1f1}', '\u{0}', '\u{0}']), ('\u{1f3}', ['\u{1f1}', '\u{0}', '\u{0}']), + ('\u{1f5}', ['\u{1f4}', '\u{0}', '\u{0}']), ('\u{1f9}', ['\u{1f8}', '\u{0}', '\u{0}']), + ('\u{1fb}', ['\u{1fa}', '\u{0}', '\u{0}']), ('\u{1fd}', ['\u{1fc}', '\u{0}', '\u{0}']), + ('\u{1ff}', ['\u{1fe}', '\u{0}', '\u{0}']), ('\u{201}', ['\u{200}', '\u{0}', '\u{0}']), + ('\u{203}', ['\u{202}', '\u{0}', '\u{0}']), ('\u{205}', ['\u{204}', '\u{0}', '\u{0}']), + ('\u{207}', ['\u{206}', '\u{0}', '\u{0}']), ('\u{209}', ['\u{208}', '\u{0}', '\u{0}']), + ('\u{20b}', ['\u{20a}', '\u{0}', '\u{0}']), ('\u{20d}', ['\u{20c}', '\u{0}', '\u{0}']), + ('\u{20f}', ['\u{20e}', '\u{0}', '\u{0}']), ('\u{211}', ['\u{210}', '\u{0}', '\u{0}']), + ('\u{213}', ['\u{212}', '\u{0}', '\u{0}']), ('\u{215}', ['\u{214}', '\u{0}', '\u{0}']), + ('\u{217}', ['\u{216}', '\u{0}', '\u{0}']), ('\u{219}', ['\u{218}', '\u{0}', '\u{0}']), + ('\u{21b}', ['\u{21a}', '\u{0}', '\u{0}']), ('\u{21d}', ['\u{21c}', '\u{0}', '\u{0}']), + ('\u{21f}', ['\u{21e}', '\u{0}', '\u{0}']), ('\u{223}', ['\u{222}', '\u{0}', '\u{0}']), + ('\u{225}', ['\u{224}', '\u{0}', '\u{0}']), ('\u{227}', ['\u{226}', '\u{0}', '\u{0}']), + ('\u{229}', ['\u{228}', '\u{0}', '\u{0}']), ('\u{22b}', ['\u{22a}', '\u{0}', '\u{0}']), + ('\u{22d}', ['\u{22c}', '\u{0}', '\u{0}']), ('\u{22f}', ['\u{22e}', '\u{0}', '\u{0}']), + ('\u{231}', ['\u{230}', '\u{0}', '\u{0}']), ('\u{233}', ['\u{232}', '\u{0}', '\u{0}']), + ('\u{23c}', ['\u{23b}', '\u{0}', '\u{0}']), ('\u{23f}', ['\u{2c7e}', '\u{0}', '\u{0}']), + ('\u{240}', ['\u{2c7f}', '\u{0}', '\u{0}']), ('\u{242}', ['\u{241}', '\u{0}', '\u{0}']), + ('\u{247}', ['\u{246}', '\u{0}', '\u{0}']), ('\u{249}', ['\u{248}', '\u{0}', '\u{0}']), + ('\u{24b}', ['\u{24a}', '\u{0}', '\u{0}']), ('\u{24d}', ['\u{24c}', '\u{0}', '\u{0}']), + ('\u{24f}', ['\u{24e}', '\u{0}', '\u{0}']), ('\u{250}', ['\u{2c6f}', '\u{0}', '\u{0}']), + ('\u{251}', ['\u{2c6d}', '\u{0}', '\u{0}']), ('\u{252}', ['\u{2c70}', '\u{0}', '\u{0}']), + ('\u{253}', ['\u{181}', '\u{0}', '\u{0}']), ('\u{254}', ['\u{186}', '\u{0}', '\u{0}']), + ('\u{256}', ['\u{189}', '\u{0}', '\u{0}']), ('\u{257}', ['\u{18a}', '\u{0}', '\u{0}']), + ('\u{259}', ['\u{18f}', '\u{0}', '\u{0}']), ('\u{25b}', ['\u{190}', '\u{0}', '\u{0}']), + ('\u{25c}', ['\u{a7ab}', '\u{0}', '\u{0}']), ('\u{260}', ['\u{193}', '\u{0}', '\u{0}']), + ('\u{261}', ['\u{a7ac}', '\u{0}', '\u{0}']), ('\u{263}', ['\u{194}', '\u{0}', '\u{0}']), + ('\u{264}', ['\u{a7cb}', '\u{0}', '\u{0}']), ('\u{265}', ['\u{a78d}', '\u{0}', '\u{0}']), + ('\u{266}', ['\u{a7aa}', '\u{0}', '\u{0}']), ('\u{268}', ['\u{197}', '\u{0}', '\u{0}']), + ('\u{269}', ['\u{196}', '\u{0}', '\u{0}']), ('\u{26a}', ['\u{a7ae}', '\u{0}', '\u{0}']), + ('\u{26b}', ['\u{2c62}', '\u{0}', '\u{0}']), ('\u{26c}', ['\u{a7ad}', '\u{0}', '\u{0}']), + ('\u{26f}', ['\u{19c}', '\u{0}', '\u{0}']), ('\u{271}', ['\u{2c6e}', '\u{0}', '\u{0}']), + ('\u{272}', ['\u{19d}', '\u{0}', '\u{0}']), ('\u{275}', ['\u{19f}', '\u{0}', '\u{0}']), + ('\u{27d}', ['\u{2c64}', '\u{0}', '\u{0}']), ('\u{280}', ['\u{1a6}', '\u{0}', '\u{0}']), + ('\u{282}', ['\u{a7c5}', '\u{0}', '\u{0}']), ('\u{283}', ['\u{1a9}', '\u{0}', '\u{0}']), + ('\u{287}', ['\u{a7b1}', '\u{0}', '\u{0}']), ('\u{288}', ['\u{1ae}', '\u{0}', '\u{0}']), + ('\u{289}', ['\u{244}', '\u{0}', '\u{0}']), ('\u{28a}', ['\u{1b1}', '\u{0}', '\u{0}']), + ('\u{28b}', ['\u{1b2}', '\u{0}', '\u{0}']), ('\u{28c}', ['\u{245}', '\u{0}', '\u{0}']), + ('\u{292}', ['\u{1b7}', '\u{0}', '\u{0}']), ('\u{29d}', ['\u{a7b2}', '\u{0}', '\u{0}']), + ('\u{29e}', ['\u{a7b0}', '\u{0}', '\u{0}']), ('\u{345}', ['\u{399}', '\u{0}', '\u{0}']), + ('\u{371}', ['\u{370}', '\u{0}', '\u{0}']), ('\u{373}', ['\u{372}', '\u{0}', '\u{0}']), + ('\u{377}', ['\u{376}', '\u{0}', '\u{0}']), ('\u{37b}', ['\u{3fd}', '\u{0}', '\u{0}']), + ('\u{37c}', ['\u{3fe}', '\u{0}', '\u{0}']), ('\u{37d}', ['\u{3ff}', '\u{0}', '\u{0}']), + ('\u{390}', ['\u{399}', '\u{308}', '\u{301}']), ('\u{3ac}', ['\u{386}', '\u{0}', '\u{0}']), + ('\u{3ad}', ['\u{388}', '\u{0}', '\u{0}']), ('\u{3ae}', ['\u{389}', '\u{0}', '\u{0}']), + ('\u{3af}', ['\u{38a}', '\u{0}', '\u{0}']), ('\u{3b0}', ['\u{3a5}', '\u{308}', '\u{301}']), + ('\u{3b1}', ['\u{391}', '\u{0}', '\u{0}']), ('\u{3b2}', ['\u{392}', '\u{0}', '\u{0}']), + ('\u{3b3}', ['\u{393}', '\u{0}', '\u{0}']), ('\u{3b4}', ['\u{394}', '\u{0}', '\u{0}']), + ('\u{3b5}', ['\u{395}', '\u{0}', '\u{0}']), ('\u{3b6}', ['\u{396}', '\u{0}', '\u{0}']), + ('\u{3b7}', ['\u{397}', '\u{0}', '\u{0}']), ('\u{3b8}', ['\u{398}', '\u{0}', '\u{0}']), + ('\u{3b9}', ['\u{399}', '\u{0}', '\u{0}']), ('\u{3ba}', ['\u{39a}', '\u{0}', '\u{0}']), + ('\u{3bb}', ['\u{39b}', '\u{0}', '\u{0}']), ('\u{3bc}', ['\u{39c}', '\u{0}', '\u{0}']), + ('\u{3bd}', ['\u{39d}', '\u{0}', '\u{0}']), ('\u{3be}', ['\u{39e}', '\u{0}', '\u{0}']), + ('\u{3bf}', ['\u{39f}', '\u{0}', '\u{0}']), ('\u{3c0}', ['\u{3a0}', '\u{0}', '\u{0}']), + ('\u{3c1}', ['\u{3a1}', '\u{0}', '\u{0}']), ('\u{3c2}', ['\u{3a3}', '\u{0}', '\u{0}']), + ('\u{3c3}', ['\u{3a3}', '\u{0}', '\u{0}']), ('\u{3c4}', ['\u{3a4}', '\u{0}', '\u{0}']), + ('\u{3c5}', ['\u{3a5}', '\u{0}', '\u{0}']), ('\u{3c6}', ['\u{3a6}', '\u{0}', '\u{0}']), + ('\u{3c7}', ['\u{3a7}', '\u{0}', '\u{0}']), ('\u{3c8}', ['\u{3a8}', '\u{0}', '\u{0}']), + ('\u{3c9}', ['\u{3a9}', '\u{0}', '\u{0}']), ('\u{3ca}', ['\u{3aa}', '\u{0}', '\u{0}']), + ('\u{3cb}', ['\u{3ab}', '\u{0}', '\u{0}']), ('\u{3cc}', ['\u{38c}', '\u{0}', '\u{0}']), + ('\u{3cd}', ['\u{38e}', '\u{0}', '\u{0}']), ('\u{3ce}', ['\u{38f}', '\u{0}', '\u{0}']), + ('\u{3d0}', ['\u{392}', '\u{0}', '\u{0}']), ('\u{3d1}', ['\u{398}', '\u{0}', '\u{0}']), + ('\u{3d5}', ['\u{3a6}', '\u{0}', '\u{0}']), ('\u{3d6}', ['\u{3a0}', '\u{0}', '\u{0}']), + ('\u{3d7}', ['\u{3cf}', '\u{0}', '\u{0}']), ('\u{3d9}', ['\u{3d8}', '\u{0}', '\u{0}']), + ('\u{3db}', ['\u{3da}', '\u{0}', '\u{0}']), ('\u{3dd}', ['\u{3dc}', '\u{0}', '\u{0}']), + ('\u{3df}', ['\u{3de}', '\u{0}', '\u{0}']), ('\u{3e1}', ['\u{3e0}', '\u{0}', '\u{0}']), + ('\u{3e3}', ['\u{3e2}', '\u{0}', '\u{0}']), ('\u{3e5}', ['\u{3e4}', '\u{0}', '\u{0}']), + ('\u{3e7}', ['\u{3e6}', '\u{0}', '\u{0}']), ('\u{3e9}', ['\u{3e8}', '\u{0}', '\u{0}']), + ('\u{3eb}', ['\u{3ea}', '\u{0}', '\u{0}']), ('\u{3ed}', ['\u{3ec}', '\u{0}', '\u{0}']), + ('\u{3ef}', ['\u{3ee}', '\u{0}', '\u{0}']), ('\u{3f0}', ['\u{39a}', '\u{0}', '\u{0}']), + ('\u{3f1}', ['\u{3a1}', '\u{0}', '\u{0}']), ('\u{3f2}', ['\u{3f9}', '\u{0}', '\u{0}']), + ('\u{3f3}', ['\u{37f}', '\u{0}', '\u{0}']), ('\u{3f5}', ['\u{395}', '\u{0}', '\u{0}']), + ('\u{3f8}', ['\u{3f7}', '\u{0}', '\u{0}']), ('\u{3fb}', ['\u{3fa}', '\u{0}', '\u{0}']), + ('\u{430}', ['\u{410}', '\u{0}', '\u{0}']), ('\u{431}', ['\u{411}', '\u{0}', '\u{0}']), + ('\u{432}', ['\u{412}', '\u{0}', '\u{0}']), ('\u{433}', ['\u{413}', '\u{0}', '\u{0}']), + ('\u{434}', ['\u{414}', '\u{0}', '\u{0}']), ('\u{435}', ['\u{415}', '\u{0}', '\u{0}']), + ('\u{436}', ['\u{416}', '\u{0}', '\u{0}']), ('\u{437}', ['\u{417}', '\u{0}', '\u{0}']), + ('\u{438}', ['\u{418}', '\u{0}', '\u{0}']), ('\u{439}', ['\u{419}', '\u{0}', '\u{0}']), + ('\u{43a}', ['\u{41a}', '\u{0}', '\u{0}']), ('\u{43b}', ['\u{41b}', '\u{0}', '\u{0}']), + ('\u{43c}', ['\u{41c}', '\u{0}', '\u{0}']), ('\u{43d}', ['\u{41d}', '\u{0}', '\u{0}']), + ('\u{43e}', ['\u{41e}', '\u{0}', '\u{0}']), ('\u{43f}', ['\u{41f}', '\u{0}', '\u{0}']), + ('\u{440}', ['\u{420}', '\u{0}', '\u{0}']), ('\u{441}', ['\u{421}', '\u{0}', '\u{0}']), + ('\u{442}', ['\u{422}', '\u{0}', '\u{0}']), ('\u{443}', ['\u{423}', '\u{0}', '\u{0}']), + ('\u{444}', ['\u{424}', '\u{0}', '\u{0}']), ('\u{445}', ['\u{425}', '\u{0}', '\u{0}']), + ('\u{446}', ['\u{426}', '\u{0}', '\u{0}']), ('\u{447}', ['\u{427}', '\u{0}', '\u{0}']), + ('\u{448}', ['\u{428}', '\u{0}', '\u{0}']), ('\u{449}', ['\u{429}', '\u{0}', '\u{0}']), + ('\u{44a}', ['\u{42a}', '\u{0}', '\u{0}']), ('\u{44b}', ['\u{42b}', '\u{0}', '\u{0}']), + ('\u{44c}', ['\u{42c}', '\u{0}', '\u{0}']), ('\u{44d}', ['\u{42d}', '\u{0}', '\u{0}']), + ('\u{44e}', ['\u{42e}', '\u{0}', '\u{0}']), ('\u{44f}', ['\u{42f}', '\u{0}', '\u{0}']), + ('\u{450}', ['\u{400}', '\u{0}', '\u{0}']), ('\u{451}', ['\u{401}', '\u{0}', '\u{0}']), + ('\u{452}', ['\u{402}', '\u{0}', '\u{0}']), ('\u{453}', ['\u{403}', '\u{0}', '\u{0}']), + ('\u{454}', ['\u{404}', '\u{0}', '\u{0}']), ('\u{455}', ['\u{405}', '\u{0}', '\u{0}']), + ('\u{456}', ['\u{406}', '\u{0}', '\u{0}']), ('\u{457}', ['\u{407}', '\u{0}', '\u{0}']), + ('\u{458}', ['\u{408}', '\u{0}', '\u{0}']), ('\u{459}', ['\u{409}', '\u{0}', '\u{0}']), + ('\u{45a}', ['\u{40a}', '\u{0}', '\u{0}']), ('\u{45b}', ['\u{40b}', '\u{0}', '\u{0}']), + ('\u{45c}', ['\u{40c}', '\u{0}', '\u{0}']), ('\u{45d}', ['\u{40d}', '\u{0}', '\u{0}']), + ('\u{45e}', ['\u{40e}', '\u{0}', '\u{0}']), ('\u{45f}', ['\u{40f}', '\u{0}', '\u{0}']), + ('\u{461}', ['\u{460}', '\u{0}', '\u{0}']), ('\u{463}', ['\u{462}', '\u{0}', '\u{0}']), + ('\u{465}', ['\u{464}', '\u{0}', '\u{0}']), ('\u{467}', ['\u{466}', '\u{0}', '\u{0}']), + ('\u{469}', ['\u{468}', '\u{0}', '\u{0}']), ('\u{46b}', ['\u{46a}', '\u{0}', '\u{0}']), + ('\u{46d}', ['\u{46c}', '\u{0}', '\u{0}']), ('\u{46f}', ['\u{46e}', '\u{0}', '\u{0}']), + ('\u{471}', ['\u{470}', '\u{0}', '\u{0}']), ('\u{473}', ['\u{472}', '\u{0}', '\u{0}']), + ('\u{475}', ['\u{474}', '\u{0}', '\u{0}']), ('\u{477}', ['\u{476}', '\u{0}', '\u{0}']), + ('\u{479}', ['\u{478}', '\u{0}', '\u{0}']), ('\u{47b}', ['\u{47a}', '\u{0}', '\u{0}']), + ('\u{47d}', ['\u{47c}', '\u{0}', '\u{0}']), ('\u{47f}', ['\u{47e}', '\u{0}', '\u{0}']), + ('\u{481}', ['\u{480}', '\u{0}', '\u{0}']), ('\u{48b}', ['\u{48a}', '\u{0}', '\u{0}']), + ('\u{48d}', ['\u{48c}', '\u{0}', '\u{0}']), ('\u{48f}', ['\u{48e}', '\u{0}', '\u{0}']), + ('\u{491}', ['\u{490}', '\u{0}', '\u{0}']), ('\u{493}', ['\u{492}', '\u{0}', '\u{0}']), + ('\u{495}', ['\u{494}', '\u{0}', '\u{0}']), ('\u{497}', ['\u{496}', '\u{0}', '\u{0}']), + ('\u{499}', ['\u{498}', '\u{0}', '\u{0}']), ('\u{49b}', ['\u{49a}', '\u{0}', '\u{0}']), + ('\u{49d}', ['\u{49c}', '\u{0}', '\u{0}']), ('\u{49f}', ['\u{49e}', '\u{0}', '\u{0}']), + ('\u{4a1}', ['\u{4a0}', '\u{0}', '\u{0}']), ('\u{4a3}', ['\u{4a2}', '\u{0}', '\u{0}']), + ('\u{4a5}', ['\u{4a4}', '\u{0}', '\u{0}']), ('\u{4a7}', ['\u{4a6}', '\u{0}', '\u{0}']), + ('\u{4a9}', ['\u{4a8}', '\u{0}', '\u{0}']), ('\u{4ab}', ['\u{4aa}', '\u{0}', '\u{0}']), + ('\u{4ad}', ['\u{4ac}', '\u{0}', '\u{0}']), ('\u{4af}', ['\u{4ae}', '\u{0}', '\u{0}']), + ('\u{4b1}', ['\u{4b0}', '\u{0}', '\u{0}']), ('\u{4b3}', ['\u{4b2}', '\u{0}', '\u{0}']), + ('\u{4b5}', ['\u{4b4}', '\u{0}', '\u{0}']), ('\u{4b7}', ['\u{4b6}', '\u{0}', '\u{0}']), + ('\u{4b9}', ['\u{4b8}', '\u{0}', '\u{0}']), ('\u{4bb}', ['\u{4ba}', '\u{0}', '\u{0}']), + ('\u{4bd}', ['\u{4bc}', '\u{0}', '\u{0}']), ('\u{4bf}', ['\u{4be}', '\u{0}', '\u{0}']), + ('\u{4c2}', ['\u{4c1}', '\u{0}', '\u{0}']), ('\u{4c4}', ['\u{4c3}', '\u{0}', '\u{0}']), + ('\u{4c6}', ['\u{4c5}', '\u{0}', '\u{0}']), ('\u{4c8}', ['\u{4c7}', '\u{0}', '\u{0}']), + ('\u{4ca}', ['\u{4c9}', '\u{0}', '\u{0}']), ('\u{4cc}', ['\u{4cb}', '\u{0}', '\u{0}']), + ('\u{4ce}', ['\u{4cd}', '\u{0}', '\u{0}']), ('\u{4cf}', ['\u{4c0}', '\u{0}', '\u{0}']), + ('\u{4d1}', ['\u{4d0}', '\u{0}', '\u{0}']), ('\u{4d3}', ['\u{4d2}', '\u{0}', '\u{0}']), + ('\u{4d5}', ['\u{4d4}', '\u{0}', '\u{0}']), ('\u{4d7}', ['\u{4d6}', '\u{0}', '\u{0}']), + ('\u{4d9}', ['\u{4d8}', '\u{0}', '\u{0}']), ('\u{4db}', ['\u{4da}', '\u{0}', '\u{0}']), + ('\u{4dd}', ['\u{4dc}', '\u{0}', '\u{0}']), ('\u{4df}', ['\u{4de}', '\u{0}', '\u{0}']), + ('\u{4e1}', ['\u{4e0}', '\u{0}', '\u{0}']), ('\u{4e3}', ['\u{4e2}', '\u{0}', '\u{0}']), + ('\u{4e5}', ['\u{4e4}', '\u{0}', '\u{0}']), ('\u{4e7}', ['\u{4e6}', '\u{0}', '\u{0}']), + ('\u{4e9}', ['\u{4e8}', '\u{0}', '\u{0}']), ('\u{4eb}', ['\u{4ea}', '\u{0}', '\u{0}']), + ('\u{4ed}', ['\u{4ec}', '\u{0}', '\u{0}']), ('\u{4ef}', ['\u{4ee}', '\u{0}', '\u{0}']), + ('\u{4f1}', ['\u{4f0}', '\u{0}', '\u{0}']), ('\u{4f3}', ['\u{4f2}', '\u{0}', '\u{0}']), + ('\u{4f5}', ['\u{4f4}', '\u{0}', '\u{0}']), ('\u{4f7}', ['\u{4f6}', '\u{0}', '\u{0}']), + ('\u{4f9}', ['\u{4f8}', '\u{0}', '\u{0}']), ('\u{4fb}', ['\u{4fa}', '\u{0}', '\u{0}']), + ('\u{4fd}', ['\u{4fc}', '\u{0}', '\u{0}']), ('\u{4ff}', ['\u{4fe}', '\u{0}', '\u{0}']), + ('\u{501}', ['\u{500}', '\u{0}', '\u{0}']), ('\u{503}', ['\u{502}', '\u{0}', '\u{0}']), + ('\u{505}', ['\u{504}', '\u{0}', '\u{0}']), ('\u{507}', ['\u{506}', '\u{0}', '\u{0}']), + ('\u{509}', ['\u{508}', '\u{0}', '\u{0}']), ('\u{50b}', ['\u{50a}', '\u{0}', '\u{0}']), + ('\u{50d}', ['\u{50c}', '\u{0}', '\u{0}']), ('\u{50f}', ['\u{50e}', '\u{0}', '\u{0}']), + ('\u{511}', ['\u{510}', '\u{0}', '\u{0}']), ('\u{513}', ['\u{512}', '\u{0}', '\u{0}']), + ('\u{515}', ['\u{514}', '\u{0}', '\u{0}']), ('\u{517}', ['\u{516}', '\u{0}', '\u{0}']), + ('\u{519}', ['\u{518}', '\u{0}', '\u{0}']), ('\u{51b}', ['\u{51a}', '\u{0}', '\u{0}']), + ('\u{51d}', ['\u{51c}', '\u{0}', '\u{0}']), ('\u{51f}', ['\u{51e}', '\u{0}', '\u{0}']), + ('\u{521}', ['\u{520}', '\u{0}', '\u{0}']), ('\u{523}', ['\u{522}', '\u{0}', '\u{0}']), + ('\u{525}', ['\u{524}', '\u{0}', '\u{0}']), ('\u{527}', ['\u{526}', '\u{0}', '\u{0}']), + ('\u{529}', ['\u{528}', '\u{0}', '\u{0}']), ('\u{52b}', ['\u{52a}', '\u{0}', '\u{0}']), + ('\u{52d}', ['\u{52c}', '\u{0}', '\u{0}']), ('\u{52f}', ['\u{52e}', '\u{0}', '\u{0}']), + ('\u{561}', ['\u{531}', '\u{0}', '\u{0}']), ('\u{562}', ['\u{532}', '\u{0}', '\u{0}']), + ('\u{563}', ['\u{533}', '\u{0}', '\u{0}']), ('\u{564}', ['\u{534}', '\u{0}', '\u{0}']), + ('\u{565}', ['\u{535}', '\u{0}', '\u{0}']), ('\u{566}', ['\u{536}', '\u{0}', '\u{0}']), + ('\u{567}', ['\u{537}', '\u{0}', '\u{0}']), ('\u{568}', ['\u{538}', '\u{0}', '\u{0}']), + ('\u{569}', ['\u{539}', '\u{0}', '\u{0}']), ('\u{56a}', ['\u{53a}', '\u{0}', '\u{0}']), + ('\u{56b}', ['\u{53b}', '\u{0}', '\u{0}']), ('\u{56c}', ['\u{53c}', '\u{0}', '\u{0}']), + ('\u{56d}', ['\u{53d}', '\u{0}', '\u{0}']), ('\u{56e}', ['\u{53e}', '\u{0}', '\u{0}']), + ('\u{56f}', ['\u{53f}', '\u{0}', '\u{0}']), ('\u{570}', ['\u{540}', '\u{0}', '\u{0}']), + ('\u{571}', ['\u{541}', '\u{0}', '\u{0}']), ('\u{572}', ['\u{542}', '\u{0}', '\u{0}']), + ('\u{573}', ['\u{543}', '\u{0}', '\u{0}']), ('\u{574}', ['\u{544}', '\u{0}', '\u{0}']), + ('\u{575}', ['\u{545}', '\u{0}', '\u{0}']), ('\u{576}', ['\u{546}', '\u{0}', '\u{0}']), + ('\u{577}', ['\u{547}', '\u{0}', '\u{0}']), ('\u{578}', ['\u{548}', '\u{0}', '\u{0}']), + ('\u{579}', ['\u{549}', '\u{0}', '\u{0}']), ('\u{57a}', ['\u{54a}', '\u{0}', '\u{0}']), + ('\u{57b}', ['\u{54b}', '\u{0}', '\u{0}']), ('\u{57c}', ['\u{54c}', '\u{0}', '\u{0}']), + ('\u{57d}', ['\u{54d}', '\u{0}', '\u{0}']), ('\u{57e}', ['\u{54e}', '\u{0}', '\u{0}']), + ('\u{57f}', ['\u{54f}', '\u{0}', '\u{0}']), ('\u{580}', ['\u{550}', '\u{0}', '\u{0}']), + ('\u{581}', ['\u{551}', '\u{0}', '\u{0}']), ('\u{582}', ['\u{552}', '\u{0}', '\u{0}']), + ('\u{583}', ['\u{553}', '\u{0}', '\u{0}']), ('\u{584}', ['\u{554}', '\u{0}', '\u{0}']), + ('\u{585}', ['\u{555}', '\u{0}', '\u{0}']), ('\u{586}', ['\u{556}', '\u{0}', '\u{0}']), + ('\u{587}', ['\u{535}', '\u{552}', '\u{0}']), ('\u{10d0}', ['\u{1c90}', '\u{0}', '\u{0}']), + ('\u{10d1}', ['\u{1c91}', '\u{0}', '\u{0}']), ('\u{10d2}', ['\u{1c92}', '\u{0}', '\u{0}']), + ('\u{10d3}', ['\u{1c93}', '\u{0}', '\u{0}']), ('\u{10d4}', ['\u{1c94}', '\u{0}', '\u{0}']), + ('\u{10d5}', ['\u{1c95}', '\u{0}', '\u{0}']), ('\u{10d6}', ['\u{1c96}', '\u{0}', '\u{0}']), + ('\u{10d7}', ['\u{1c97}', '\u{0}', '\u{0}']), ('\u{10d8}', ['\u{1c98}', '\u{0}', '\u{0}']), + ('\u{10d9}', ['\u{1c99}', '\u{0}', '\u{0}']), ('\u{10da}', ['\u{1c9a}', '\u{0}', '\u{0}']), + ('\u{10db}', ['\u{1c9b}', '\u{0}', '\u{0}']), ('\u{10dc}', ['\u{1c9c}', '\u{0}', '\u{0}']), + ('\u{10dd}', ['\u{1c9d}', '\u{0}', '\u{0}']), ('\u{10de}', ['\u{1c9e}', '\u{0}', '\u{0}']), + ('\u{10df}', ['\u{1c9f}', '\u{0}', '\u{0}']), ('\u{10e0}', ['\u{1ca0}', '\u{0}', '\u{0}']), + ('\u{10e1}', ['\u{1ca1}', '\u{0}', '\u{0}']), ('\u{10e2}', ['\u{1ca2}', '\u{0}', '\u{0}']), + ('\u{10e3}', ['\u{1ca3}', '\u{0}', '\u{0}']), ('\u{10e4}', ['\u{1ca4}', '\u{0}', '\u{0}']), + ('\u{10e5}', ['\u{1ca5}', '\u{0}', '\u{0}']), ('\u{10e6}', ['\u{1ca6}', '\u{0}', '\u{0}']), + ('\u{10e7}', ['\u{1ca7}', '\u{0}', '\u{0}']), ('\u{10e8}', ['\u{1ca8}', '\u{0}', '\u{0}']), + ('\u{10e9}', ['\u{1ca9}', '\u{0}', '\u{0}']), ('\u{10ea}', ['\u{1caa}', '\u{0}', '\u{0}']), + ('\u{10eb}', ['\u{1cab}', '\u{0}', '\u{0}']), ('\u{10ec}', ['\u{1cac}', '\u{0}', '\u{0}']), + ('\u{10ed}', ['\u{1cad}', '\u{0}', '\u{0}']), ('\u{10ee}', ['\u{1cae}', '\u{0}', '\u{0}']), + ('\u{10ef}', ['\u{1caf}', '\u{0}', '\u{0}']), ('\u{10f0}', ['\u{1cb0}', '\u{0}', '\u{0}']), + ('\u{10f1}', ['\u{1cb1}', '\u{0}', '\u{0}']), ('\u{10f2}', ['\u{1cb2}', '\u{0}', '\u{0}']), + ('\u{10f3}', ['\u{1cb3}', '\u{0}', '\u{0}']), ('\u{10f4}', ['\u{1cb4}', '\u{0}', '\u{0}']), + ('\u{10f5}', ['\u{1cb5}', '\u{0}', '\u{0}']), ('\u{10f6}', ['\u{1cb6}', '\u{0}', '\u{0}']), + ('\u{10f7}', ['\u{1cb7}', '\u{0}', '\u{0}']), ('\u{10f8}', ['\u{1cb8}', '\u{0}', '\u{0}']), + ('\u{10f9}', ['\u{1cb9}', '\u{0}', '\u{0}']), ('\u{10fa}', ['\u{1cba}', '\u{0}', '\u{0}']), + ('\u{10fd}', ['\u{1cbd}', '\u{0}', '\u{0}']), ('\u{10fe}', ['\u{1cbe}', '\u{0}', '\u{0}']), + ('\u{10ff}', ['\u{1cbf}', '\u{0}', '\u{0}']), ('\u{13f8}', ['\u{13f0}', '\u{0}', '\u{0}']), + ('\u{13f9}', ['\u{13f1}', '\u{0}', '\u{0}']), ('\u{13fa}', ['\u{13f2}', '\u{0}', '\u{0}']), + ('\u{13fb}', ['\u{13f3}', '\u{0}', '\u{0}']), ('\u{13fc}', ['\u{13f4}', '\u{0}', '\u{0}']), + ('\u{13fd}', ['\u{13f5}', '\u{0}', '\u{0}']), ('\u{1c80}', ['\u{412}', '\u{0}', '\u{0}']), + ('\u{1c81}', ['\u{414}', '\u{0}', '\u{0}']), ('\u{1c82}', ['\u{41e}', '\u{0}', '\u{0}']), + ('\u{1c83}', ['\u{421}', '\u{0}', '\u{0}']), ('\u{1c84}', ['\u{422}', '\u{0}', '\u{0}']), + ('\u{1c85}', ['\u{422}', '\u{0}', '\u{0}']), ('\u{1c86}', ['\u{42a}', '\u{0}', '\u{0}']), + ('\u{1c87}', ['\u{462}', '\u{0}', '\u{0}']), ('\u{1c88}', ['\u{a64a}', '\u{0}', '\u{0}']), + ('\u{1c8a}', ['\u{1c89}', '\u{0}', '\u{0}']), ('\u{1d79}', ['\u{a77d}', '\u{0}', '\u{0}']), + ('\u{1d7d}', ['\u{2c63}', '\u{0}', '\u{0}']), ('\u{1d8e}', ['\u{a7c6}', '\u{0}', '\u{0}']), + ('\u{1e01}', ['\u{1e00}', '\u{0}', '\u{0}']), ('\u{1e03}', ['\u{1e02}', '\u{0}', '\u{0}']), + ('\u{1e05}', ['\u{1e04}', '\u{0}', '\u{0}']), ('\u{1e07}', ['\u{1e06}', '\u{0}', '\u{0}']), + ('\u{1e09}', ['\u{1e08}', '\u{0}', '\u{0}']), ('\u{1e0b}', ['\u{1e0a}', '\u{0}', '\u{0}']), + ('\u{1e0d}', ['\u{1e0c}', '\u{0}', '\u{0}']), ('\u{1e0f}', ['\u{1e0e}', '\u{0}', '\u{0}']), + ('\u{1e11}', ['\u{1e10}', '\u{0}', '\u{0}']), ('\u{1e13}', ['\u{1e12}', '\u{0}', '\u{0}']), + ('\u{1e15}', ['\u{1e14}', '\u{0}', '\u{0}']), ('\u{1e17}', ['\u{1e16}', '\u{0}', '\u{0}']), + ('\u{1e19}', ['\u{1e18}', '\u{0}', '\u{0}']), ('\u{1e1b}', ['\u{1e1a}', '\u{0}', '\u{0}']), + ('\u{1e1d}', ['\u{1e1c}', '\u{0}', '\u{0}']), ('\u{1e1f}', ['\u{1e1e}', '\u{0}', '\u{0}']), + ('\u{1e21}', ['\u{1e20}', '\u{0}', '\u{0}']), ('\u{1e23}', ['\u{1e22}', '\u{0}', '\u{0}']), + ('\u{1e25}', ['\u{1e24}', '\u{0}', '\u{0}']), ('\u{1e27}', ['\u{1e26}', '\u{0}', '\u{0}']), + ('\u{1e29}', ['\u{1e28}', '\u{0}', '\u{0}']), ('\u{1e2b}', ['\u{1e2a}', '\u{0}', '\u{0}']), + ('\u{1e2d}', ['\u{1e2c}', '\u{0}', '\u{0}']), ('\u{1e2f}', ['\u{1e2e}', '\u{0}', '\u{0}']), + ('\u{1e31}', ['\u{1e30}', '\u{0}', '\u{0}']), ('\u{1e33}', ['\u{1e32}', '\u{0}', '\u{0}']), + ('\u{1e35}', ['\u{1e34}', '\u{0}', '\u{0}']), ('\u{1e37}', ['\u{1e36}', '\u{0}', '\u{0}']), + ('\u{1e39}', ['\u{1e38}', '\u{0}', '\u{0}']), ('\u{1e3b}', ['\u{1e3a}', '\u{0}', '\u{0}']), + ('\u{1e3d}', ['\u{1e3c}', '\u{0}', '\u{0}']), ('\u{1e3f}', ['\u{1e3e}', '\u{0}', '\u{0}']), + ('\u{1e41}', ['\u{1e40}', '\u{0}', '\u{0}']), ('\u{1e43}', ['\u{1e42}', '\u{0}', '\u{0}']), + ('\u{1e45}', ['\u{1e44}', '\u{0}', '\u{0}']), ('\u{1e47}', ['\u{1e46}', '\u{0}', '\u{0}']), + ('\u{1e49}', ['\u{1e48}', '\u{0}', '\u{0}']), ('\u{1e4b}', ['\u{1e4a}', '\u{0}', '\u{0}']), + ('\u{1e4d}', ['\u{1e4c}', '\u{0}', '\u{0}']), ('\u{1e4f}', ['\u{1e4e}', '\u{0}', '\u{0}']), + ('\u{1e51}', ['\u{1e50}', '\u{0}', '\u{0}']), ('\u{1e53}', ['\u{1e52}', '\u{0}', '\u{0}']), + ('\u{1e55}', ['\u{1e54}', '\u{0}', '\u{0}']), ('\u{1e57}', ['\u{1e56}', '\u{0}', '\u{0}']), + ('\u{1e59}', ['\u{1e58}', '\u{0}', '\u{0}']), ('\u{1e5b}', ['\u{1e5a}', '\u{0}', '\u{0}']), + ('\u{1e5d}', ['\u{1e5c}', '\u{0}', '\u{0}']), ('\u{1e5f}', ['\u{1e5e}', '\u{0}', '\u{0}']), + ('\u{1e61}', ['\u{1e60}', '\u{0}', '\u{0}']), ('\u{1e63}', ['\u{1e62}', '\u{0}', '\u{0}']), + ('\u{1e65}', ['\u{1e64}', '\u{0}', '\u{0}']), ('\u{1e67}', ['\u{1e66}', '\u{0}', '\u{0}']), + ('\u{1e69}', ['\u{1e68}', '\u{0}', '\u{0}']), ('\u{1e6b}', ['\u{1e6a}', '\u{0}', '\u{0}']), + ('\u{1e6d}', ['\u{1e6c}', '\u{0}', '\u{0}']), ('\u{1e6f}', ['\u{1e6e}', '\u{0}', '\u{0}']), + ('\u{1e71}', ['\u{1e70}', '\u{0}', '\u{0}']), ('\u{1e73}', ['\u{1e72}', '\u{0}', '\u{0}']), + ('\u{1e75}', ['\u{1e74}', '\u{0}', '\u{0}']), ('\u{1e77}', ['\u{1e76}', '\u{0}', '\u{0}']), + ('\u{1e79}', ['\u{1e78}', '\u{0}', '\u{0}']), ('\u{1e7b}', ['\u{1e7a}', '\u{0}', '\u{0}']), + ('\u{1e7d}', ['\u{1e7c}', '\u{0}', '\u{0}']), ('\u{1e7f}', ['\u{1e7e}', '\u{0}', '\u{0}']), + ('\u{1e81}', ['\u{1e80}', '\u{0}', '\u{0}']), ('\u{1e83}', ['\u{1e82}', '\u{0}', '\u{0}']), + ('\u{1e85}', ['\u{1e84}', '\u{0}', '\u{0}']), ('\u{1e87}', ['\u{1e86}', '\u{0}', '\u{0}']), + ('\u{1e89}', ['\u{1e88}', '\u{0}', '\u{0}']), ('\u{1e8b}', ['\u{1e8a}', '\u{0}', '\u{0}']), + ('\u{1e8d}', ['\u{1e8c}', '\u{0}', '\u{0}']), ('\u{1e8f}', ['\u{1e8e}', '\u{0}', '\u{0}']), + ('\u{1e91}', ['\u{1e90}', '\u{0}', '\u{0}']), ('\u{1e93}', ['\u{1e92}', '\u{0}', '\u{0}']), + ('\u{1e95}', ['\u{1e94}', '\u{0}', '\u{0}']), ('\u{1e96}', ['\u{48}', '\u{331}', '\u{0}']), + ('\u{1e97}', ['\u{54}', '\u{308}', '\u{0}']), ('\u{1e98}', ['\u{57}', '\u{30a}', '\u{0}']), + ('\u{1e99}', ['\u{59}', '\u{30a}', '\u{0}']), ('\u{1e9a}', ['\u{41}', '\u{2be}', '\u{0}']), + ('\u{1e9b}', ['\u{1e60}', '\u{0}', '\u{0}']), ('\u{1ea1}', ['\u{1ea0}', '\u{0}', '\u{0}']), + ('\u{1ea3}', ['\u{1ea2}', '\u{0}', '\u{0}']), ('\u{1ea5}', ['\u{1ea4}', '\u{0}', '\u{0}']), + ('\u{1ea7}', ['\u{1ea6}', '\u{0}', '\u{0}']), ('\u{1ea9}', ['\u{1ea8}', '\u{0}', '\u{0}']), + ('\u{1eab}', ['\u{1eaa}', '\u{0}', '\u{0}']), ('\u{1ead}', ['\u{1eac}', '\u{0}', '\u{0}']), + ('\u{1eaf}', ['\u{1eae}', '\u{0}', '\u{0}']), ('\u{1eb1}', ['\u{1eb0}', '\u{0}', '\u{0}']), + ('\u{1eb3}', ['\u{1eb2}', '\u{0}', '\u{0}']), ('\u{1eb5}', ['\u{1eb4}', '\u{0}', '\u{0}']), + ('\u{1eb7}', ['\u{1eb6}', '\u{0}', '\u{0}']), ('\u{1eb9}', ['\u{1eb8}', '\u{0}', '\u{0}']), + ('\u{1ebb}', ['\u{1eba}', '\u{0}', '\u{0}']), ('\u{1ebd}', ['\u{1ebc}', '\u{0}', '\u{0}']), + ('\u{1ebf}', ['\u{1ebe}', '\u{0}', '\u{0}']), ('\u{1ec1}', ['\u{1ec0}', '\u{0}', '\u{0}']), + ('\u{1ec3}', ['\u{1ec2}', '\u{0}', '\u{0}']), ('\u{1ec5}', ['\u{1ec4}', '\u{0}', '\u{0}']), + ('\u{1ec7}', ['\u{1ec6}', '\u{0}', '\u{0}']), ('\u{1ec9}', ['\u{1ec8}', '\u{0}', '\u{0}']), + ('\u{1ecb}', ['\u{1eca}', '\u{0}', '\u{0}']), ('\u{1ecd}', ['\u{1ecc}', '\u{0}', '\u{0}']), + ('\u{1ecf}', ['\u{1ece}', '\u{0}', '\u{0}']), ('\u{1ed1}', ['\u{1ed0}', '\u{0}', '\u{0}']), + ('\u{1ed3}', ['\u{1ed2}', '\u{0}', '\u{0}']), ('\u{1ed5}', ['\u{1ed4}', '\u{0}', '\u{0}']), + ('\u{1ed7}', ['\u{1ed6}', '\u{0}', '\u{0}']), ('\u{1ed9}', ['\u{1ed8}', '\u{0}', '\u{0}']), + ('\u{1edb}', ['\u{1eda}', '\u{0}', '\u{0}']), ('\u{1edd}', ['\u{1edc}', '\u{0}', '\u{0}']), + ('\u{1edf}', ['\u{1ede}', '\u{0}', '\u{0}']), ('\u{1ee1}', ['\u{1ee0}', '\u{0}', '\u{0}']), + ('\u{1ee3}', ['\u{1ee2}', '\u{0}', '\u{0}']), ('\u{1ee5}', ['\u{1ee4}', '\u{0}', '\u{0}']), + ('\u{1ee7}', ['\u{1ee6}', '\u{0}', '\u{0}']), ('\u{1ee9}', ['\u{1ee8}', '\u{0}', '\u{0}']), + ('\u{1eeb}', ['\u{1eea}', '\u{0}', '\u{0}']), ('\u{1eed}', ['\u{1eec}', '\u{0}', '\u{0}']), + ('\u{1eef}', ['\u{1eee}', '\u{0}', '\u{0}']), ('\u{1ef1}', ['\u{1ef0}', '\u{0}', '\u{0}']), + ('\u{1ef3}', ['\u{1ef2}', '\u{0}', '\u{0}']), ('\u{1ef5}', ['\u{1ef4}', '\u{0}', '\u{0}']), + ('\u{1ef7}', ['\u{1ef6}', '\u{0}', '\u{0}']), ('\u{1ef9}', ['\u{1ef8}', '\u{0}', '\u{0}']), + ('\u{1efb}', ['\u{1efa}', '\u{0}', '\u{0}']), ('\u{1efd}', ['\u{1efc}', '\u{0}', '\u{0}']), + ('\u{1eff}', ['\u{1efe}', '\u{0}', '\u{0}']), ('\u{1f00}', ['\u{1f08}', '\u{0}', '\u{0}']), + ('\u{1f01}', ['\u{1f09}', '\u{0}', '\u{0}']), ('\u{1f02}', ['\u{1f0a}', '\u{0}', '\u{0}']), + ('\u{1f03}', ['\u{1f0b}', '\u{0}', '\u{0}']), ('\u{1f04}', ['\u{1f0c}', '\u{0}', '\u{0}']), + ('\u{1f05}', ['\u{1f0d}', '\u{0}', '\u{0}']), ('\u{1f06}', ['\u{1f0e}', '\u{0}', '\u{0}']), + ('\u{1f07}', ['\u{1f0f}', '\u{0}', '\u{0}']), ('\u{1f10}', ['\u{1f18}', '\u{0}', '\u{0}']), + ('\u{1f11}', ['\u{1f19}', '\u{0}', '\u{0}']), ('\u{1f12}', ['\u{1f1a}', '\u{0}', '\u{0}']), + ('\u{1f13}', ['\u{1f1b}', '\u{0}', '\u{0}']), ('\u{1f14}', ['\u{1f1c}', '\u{0}', '\u{0}']), + ('\u{1f15}', ['\u{1f1d}', '\u{0}', '\u{0}']), ('\u{1f20}', ['\u{1f28}', '\u{0}', '\u{0}']), + ('\u{1f21}', ['\u{1f29}', '\u{0}', '\u{0}']), ('\u{1f22}', ['\u{1f2a}', '\u{0}', '\u{0}']), + ('\u{1f23}', ['\u{1f2b}', '\u{0}', '\u{0}']), ('\u{1f24}', ['\u{1f2c}', '\u{0}', '\u{0}']), + ('\u{1f25}', ['\u{1f2d}', '\u{0}', '\u{0}']), ('\u{1f26}', ['\u{1f2e}', '\u{0}', '\u{0}']), + ('\u{1f27}', ['\u{1f2f}', '\u{0}', '\u{0}']), ('\u{1f30}', ['\u{1f38}', '\u{0}', '\u{0}']), + ('\u{1f31}', ['\u{1f39}', '\u{0}', '\u{0}']), ('\u{1f32}', ['\u{1f3a}', '\u{0}', '\u{0}']), + ('\u{1f33}', ['\u{1f3b}', '\u{0}', '\u{0}']), ('\u{1f34}', ['\u{1f3c}', '\u{0}', '\u{0}']), + ('\u{1f35}', ['\u{1f3d}', '\u{0}', '\u{0}']), ('\u{1f36}', ['\u{1f3e}', '\u{0}', '\u{0}']), + ('\u{1f37}', ['\u{1f3f}', '\u{0}', '\u{0}']), ('\u{1f40}', ['\u{1f48}', '\u{0}', '\u{0}']), + ('\u{1f41}', ['\u{1f49}', '\u{0}', '\u{0}']), ('\u{1f42}', ['\u{1f4a}', '\u{0}', '\u{0}']), + ('\u{1f43}', ['\u{1f4b}', '\u{0}', '\u{0}']), ('\u{1f44}', ['\u{1f4c}', '\u{0}', '\u{0}']), + ('\u{1f45}', ['\u{1f4d}', '\u{0}', '\u{0}']), ('\u{1f50}', ['\u{3a5}', '\u{313}', '\u{0}']), + ('\u{1f51}', ['\u{1f59}', '\u{0}', '\u{0}']), + ('\u{1f52}', ['\u{3a5}', '\u{313}', '\u{300}']), + ('\u{1f53}', ['\u{1f5b}', '\u{0}', '\u{0}']), + ('\u{1f54}', ['\u{3a5}', '\u{313}', '\u{301}']), + ('\u{1f55}', ['\u{1f5d}', '\u{0}', '\u{0}']), + ('\u{1f56}', ['\u{3a5}', '\u{313}', '\u{342}']), + ('\u{1f57}', ['\u{1f5f}', '\u{0}', '\u{0}']), ('\u{1f60}', ['\u{1f68}', '\u{0}', '\u{0}']), + ('\u{1f61}', ['\u{1f69}', '\u{0}', '\u{0}']), ('\u{1f62}', ['\u{1f6a}', '\u{0}', '\u{0}']), + ('\u{1f63}', ['\u{1f6b}', '\u{0}', '\u{0}']), ('\u{1f64}', ['\u{1f6c}', '\u{0}', '\u{0}']), + ('\u{1f65}', ['\u{1f6d}', '\u{0}', '\u{0}']), ('\u{1f66}', ['\u{1f6e}', '\u{0}', '\u{0}']), + ('\u{1f67}', ['\u{1f6f}', '\u{0}', '\u{0}']), ('\u{1f70}', ['\u{1fba}', '\u{0}', '\u{0}']), + ('\u{1f71}', ['\u{1fbb}', '\u{0}', '\u{0}']), ('\u{1f72}', ['\u{1fc8}', '\u{0}', '\u{0}']), + ('\u{1f73}', ['\u{1fc9}', '\u{0}', '\u{0}']), ('\u{1f74}', ['\u{1fca}', '\u{0}', '\u{0}']), + ('\u{1f75}', ['\u{1fcb}', '\u{0}', '\u{0}']), ('\u{1f76}', ['\u{1fda}', '\u{0}', '\u{0}']), + ('\u{1f77}', ['\u{1fdb}', '\u{0}', '\u{0}']), ('\u{1f78}', ['\u{1ff8}', '\u{0}', '\u{0}']), + ('\u{1f79}', ['\u{1ff9}', '\u{0}', '\u{0}']), ('\u{1f7a}', ['\u{1fea}', '\u{0}', '\u{0}']), + ('\u{1f7b}', ['\u{1feb}', '\u{0}', '\u{0}']), ('\u{1f7c}', ['\u{1ffa}', '\u{0}', '\u{0}']), + ('\u{1f7d}', ['\u{1ffb}', '\u{0}', '\u{0}']), + ('\u{1f80}', ['\u{1f08}', '\u{399}', '\u{0}']), + ('\u{1f81}', ['\u{1f09}', '\u{399}', '\u{0}']), + ('\u{1f82}', ['\u{1f0a}', '\u{399}', '\u{0}']), + ('\u{1f83}', ['\u{1f0b}', '\u{399}', '\u{0}']), + ('\u{1f84}', ['\u{1f0c}', '\u{399}', '\u{0}']), + ('\u{1f85}', ['\u{1f0d}', '\u{399}', '\u{0}']), + ('\u{1f86}', ['\u{1f0e}', '\u{399}', '\u{0}']), + ('\u{1f87}', ['\u{1f0f}', '\u{399}', '\u{0}']), + ('\u{1f88}', ['\u{1f08}', '\u{399}', '\u{0}']), + ('\u{1f89}', ['\u{1f09}', '\u{399}', '\u{0}']), + ('\u{1f8a}', ['\u{1f0a}', '\u{399}', '\u{0}']), + ('\u{1f8b}', ['\u{1f0b}', '\u{399}', '\u{0}']), + ('\u{1f8c}', ['\u{1f0c}', '\u{399}', '\u{0}']), + ('\u{1f8d}', ['\u{1f0d}', '\u{399}', '\u{0}']), + ('\u{1f8e}', ['\u{1f0e}', '\u{399}', '\u{0}']), + ('\u{1f8f}', ['\u{1f0f}', '\u{399}', '\u{0}']), + ('\u{1f90}', ['\u{1f28}', '\u{399}', '\u{0}']), + ('\u{1f91}', ['\u{1f29}', '\u{399}', '\u{0}']), + ('\u{1f92}', ['\u{1f2a}', '\u{399}', '\u{0}']), + ('\u{1f93}', ['\u{1f2b}', '\u{399}', '\u{0}']), + ('\u{1f94}', ['\u{1f2c}', '\u{399}', '\u{0}']), + ('\u{1f95}', ['\u{1f2d}', '\u{399}', '\u{0}']), + ('\u{1f96}', ['\u{1f2e}', '\u{399}', '\u{0}']), + ('\u{1f97}', ['\u{1f2f}', '\u{399}', '\u{0}']), + ('\u{1f98}', ['\u{1f28}', '\u{399}', '\u{0}']), + ('\u{1f99}', ['\u{1f29}', '\u{399}', '\u{0}']), + ('\u{1f9a}', ['\u{1f2a}', '\u{399}', '\u{0}']), + ('\u{1f9b}', ['\u{1f2b}', '\u{399}', '\u{0}']), + ('\u{1f9c}', ['\u{1f2c}', '\u{399}', '\u{0}']), + ('\u{1f9d}', ['\u{1f2d}', '\u{399}', '\u{0}']), + ('\u{1f9e}', ['\u{1f2e}', '\u{399}', '\u{0}']), + ('\u{1f9f}', ['\u{1f2f}', '\u{399}', '\u{0}']), + ('\u{1fa0}', ['\u{1f68}', '\u{399}', '\u{0}']), + ('\u{1fa1}', ['\u{1f69}', '\u{399}', '\u{0}']), + ('\u{1fa2}', ['\u{1f6a}', '\u{399}', '\u{0}']), + ('\u{1fa3}', ['\u{1f6b}', '\u{399}', '\u{0}']), + ('\u{1fa4}', ['\u{1f6c}', '\u{399}', '\u{0}']), + ('\u{1fa5}', ['\u{1f6d}', '\u{399}', '\u{0}']), + ('\u{1fa6}', ['\u{1f6e}', '\u{399}', '\u{0}']), + ('\u{1fa7}', ['\u{1f6f}', '\u{399}', '\u{0}']), + ('\u{1fa8}', ['\u{1f68}', '\u{399}', '\u{0}']), + ('\u{1fa9}', ['\u{1f69}', '\u{399}', '\u{0}']), + ('\u{1faa}', ['\u{1f6a}', '\u{399}', '\u{0}']), + ('\u{1fab}', ['\u{1f6b}', '\u{399}', '\u{0}']), + ('\u{1fac}', ['\u{1f6c}', '\u{399}', '\u{0}']), + ('\u{1fad}', ['\u{1f6d}', '\u{399}', '\u{0}']), + ('\u{1fae}', ['\u{1f6e}', '\u{399}', '\u{0}']), + ('\u{1faf}', ['\u{1f6f}', '\u{399}', '\u{0}']), + ('\u{1fb0}', ['\u{1fb8}', '\u{0}', '\u{0}']), ('\u{1fb1}', ['\u{1fb9}', '\u{0}', '\u{0}']), + ('\u{1fb2}', ['\u{1fba}', '\u{399}', '\u{0}']), + ('\u{1fb3}', ['\u{391}', '\u{399}', '\u{0}']), + ('\u{1fb4}', ['\u{386}', '\u{399}', '\u{0}']), + ('\u{1fb6}', ['\u{391}', '\u{342}', '\u{0}']), + ('\u{1fb7}', ['\u{391}', '\u{342}', '\u{399}']), + ('\u{1fbc}', ['\u{391}', '\u{399}', '\u{0}']), ('\u{1fbe}', ['\u{399}', '\u{0}', '\u{0}']), + ('\u{1fc2}', ['\u{1fca}', '\u{399}', '\u{0}']), + ('\u{1fc3}', ['\u{397}', '\u{399}', '\u{0}']), + ('\u{1fc4}', ['\u{389}', '\u{399}', '\u{0}']), + ('\u{1fc6}', ['\u{397}', '\u{342}', '\u{0}']), + ('\u{1fc7}', ['\u{397}', '\u{342}', '\u{399}']), + ('\u{1fcc}', ['\u{397}', '\u{399}', '\u{0}']), ('\u{1fd0}', ['\u{1fd8}', '\u{0}', '\u{0}']), + ('\u{1fd1}', ['\u{1fd9}', '\u{0}', '\u{0}']), + ('\u{1fd2}', ['\u{399}', '\u{308}', '\u{300}']), + ('\u{1fd3}', ['\u{399}', '\u{308}', '\u{301}']), + ('\u{1fd6}', ['\u{399}', '\u{342}', '\u{0}']), + ('\u{1fd7}', ['\u{399}', '\u{308}', '\u{342}']), + ('\u{1fe0}', ['\u{1fe8}', '\u{0}', '\u{0}']), ('\u{1fe1}', ['\u{1fe9}', '\u{0}', '\u{0}']), + ('\u{1fe2}', ['\u{3a5}', '\u{308}', '\u{300}']), + ('\u{1fe3}', ['\u{3a5}', '\u{308}', '\u{301}']), + ('\u{1fe4}', ['\u{3a1}', '\u{313}', '\u{0}']), ('\u{1fe5}', ['\u{1fec}', '\u{0}', '\u{0}']), + ('\u{1fe6}', ['\u{3a5}', '\u{342}', '\u{0}']), + ('\u{1fe7}', ['\u{3a5}', '\u{308}', '\u{342}']), + ('\u{1ff2}', ['\u{1ffa}', '\u{399}', '\u{0}']), + ('\u{1ff3}', ['\u{3a9}', '\u{399}', '\u{0}']), + ('\u{1ff4}', ['\u{38f}', '\u{399}', '\u{0}']), + ('\u{1ff6}', ['\u{3a9}', '\u{342}', '\u{0}']), + ('\u{1ff7}', ['\u{3a9}', '\u{342}', '\u{399}']), + ('\u{1ffc}', ['\u{3a9}', '\u{399}', '\u{0}']), ('\u{214e}', ['\u{2132}', '\u{0}', '\u{0}']), + ('\u{2170}', ['\u{2160}', '\u{0}', '\u{0}']), ('\u{2171}', ['\u{2161}', '\u{0}', '\u{0}']), + ('\u{2172}', ['\u{2162}', '\u{0}', '\u{0}']), ('\u{2173}', ['\u{2163}', '\u{0}', '\u{0}']), + ('\u{2174}', ['\u{2164}', '\u{0}', '\u{0}']), ('\u{2175}', ['\u{2165}', '\u{0}', '\u{0}']), + ('\u{2176}', ['\u{2166}', '\u{0}', '\u{0}']), ('\u{2177}', ['\u{2167}', '\u{0}', '\u{0}']), + ('\u{2178}', ['\u{2168}', '\u{0}', '\u{0}']), ('\u{2179}', ['\u{2169}', '\u{0}', '\u{0}']), + ('\u{217a}', ['\u{216a}', '\u{0}', '\u{0}']), ('\u{217b}', ['\u{216b}', '\u{0}', '\u{0}']), + ('\u{217c}', ['\u{216c}', '\u{0}', '\u{0}']), ('\u{217d}', ['\u{216d}', '\u{0}', '\u{0}']), + ('\u{217e}', ['\u{216e}', '\u{0}', '\u{0}']), ('\u{217f}', ['\u{216f}', '\u{0}', '\u{0}']), + ('\u{2184}', ['\u{2183}', '\u{0}', '\u{0}']), ('\u{24d0}', ['\u{24b6}', '\u{0}', '\u{0}']), + ('\u{24d1}', ['\u{24b7}', '\u{0}', '\u{0}']), ('\u{24d2}', ['\u{24b8}', '\u{0}', '\u{0}']), + ('\u{24d3}', ['\u{24b9}', '\u{0}', '\u{0}']), ('\u{24d4}', ['\u{24ba}', '\u{0}', '\u{0}']), + ('\u{24d5}', ['\u{24bb}', '\u{0}', '\u{0}']), ('\u{24d6}', ['\u{24bc}', '\u{0}', '\u{0}']), + ('\u{24d7}', ['\u{24bd}', '\u{0}', '\u{0}']), ('\u{24d8}', ['\u{24be}', '\u{0}', '\u{0}']), + ('\u{24d9}', ['\u{24bf}', '\u{0}', '\u{0}']), ('\u{24da}', ['\u{24c0}', '\u{0}', '\u{0}']), + ('\u{24db}', ['\u{24c1}', '\u{0}', '\u{0}']), ('\u{24dc}', ['\u{24c2}', '\u{0}', '\u{0}']), + ('\u{24dd}', ['\u{24c3}', '\u{0}', '\u{0}']), ('\u{24de}', ['\u{24c4}', '\u{0}', '\u{0}']), + ('\u{24df}', ['\u{24c5}', '\u{0}', '\u{0}']), ('\u{24e0}', ['\u{24c6}', '\u{0}', '\u{0}']), + ('\u{24e1}', ['\u{24c7}', '\u{0}', '\u{0}']), ('\u{24e2}', ['\u{24c8}', '\u{0}', '\u{0}']), + ('\u{24e3}', ['\u{24c9}', '\u{0}', '\u{0}']), ('\u{24e4}', ['\u{24ca}', '\u{0}', '\u{0}']), + ('\u{24e5}', ['\u{24cb}', '\u{0}', '\u{0}']), ('\u{24e6}', ['\u{24cc}', '\u{0}', '\u{0}']), + ('\u{24e7}', ['\u{24cd}', '\u{0}', '\u{0}']), ('\u{24e8}', ['\u{24ce}', '\u{0}', '\u{0}']), + ('\u{24e9}', ['\u{24cf}', '\u{0}', '\u{0}']), ('\u{2c30}', ['\u{2c00}', '\u{0}', '\u{0}']), + ('\u{2c31}', ['\u{2c01}', '\u{0}', '\u{0}']), ('\u{2c32}', ['\u{2c02}', '\u{0}', '\u{0}']), + ('\u{2c33}', ['\u{2c03}', '\u{0}', '\u{0}']), ('\u{2c34}', ['\u{2c04}', '\u{0}', '\u{0}']), + ('\u{2c35}', ['\u{2c05}', '\u{0}', '\u{0}']), ('\u{2c36}', ['\u{2c06}', '\u{0}', '\u{0}']), + ('\u{2c37}', ['\u{2c07}', '\u{0}', '\u{0}']), ('\u{2c38}', ['\u{2c08}', '\u{0}', '\u{0}']), + ('\u{2c39}', ['\u{2c09}', '\u{0}', '\u{0}']), ('\u{2c3a}', ['\u{2c0a}', '\u{0}', '\u{0}']), + ('\u{2c3b}', ['\u{2c0b}', '\u{0}', '\u{0}']), ('\u{2c3c}', ['\u{2c0c}', '\u{0}', '\u{0}']), + ('\u{2c3d}', ['\u{2c0d}', '\u{0}', '\u{0}']), ('\u{2c3e}', ['\u{2c0e}', '\u{0}', '\u{0}']), + ('\u{2c3f}', ['\u{2c0f}', '\u{0}', '\u{0}']), ('\u{2c40}', ['\u{2c10}', '\u{0}', '\u{0}']), + ('\u{2c41}', ['\u{2c11}', '\u{0}', '\u{0}']), ('\u{2c42}', ['\u{2c12}', '\u{0}', '\u{0}']), + ('\u{2c43}', ['\u{2c13}', '\u{0}', '\u{0}']), ('\u{2c44}', ['\u{2c14}', '\u{0}', '\u{0}']), + ('\u{2c45}', ['\u{2c15}', '\u{0}', '\u{0}']), ('\u{2c46}', ['\u{2c16}', '\u{0}', '\u{0}']), + ('\u{2c47}', ['\u{2c17}', '\u{0}', '\u{0}']), ('\u{2c48}', ['\u{2c18}', '\u{0}', '\u{0}']), + ('\u{2c49}', ['\u{2c19}', '\u{0}', '\u{0}']), ('\u{2c4a}', ['\u{2c1a}', '\u{0}', '\u{0}']), + ('\u{2c4b}', ['\u{2c1b}', '\u{0}', '\u{0}']), ('\u{2c4c}', ['\u{2c1c}', '\u{0}', '\u{0}']), + ('\u{2c4d}', ['\u{2c1d}', '\u{0}', '\u{0}']), ('\u{2c4e}', ['\u{2c1e}', '\u{0}', '\u{0}']), + ('\u{2c4f}', ['\u{2c1f}', '\u{0}', '\u{0}']), ('\u{2c50}', ['\u{2c20}', '\u{0}', '\u{0}']), + ('\u{2c51}', ['\u{2c21}', '\u{0}', '\u{0}']), ('\u{2c52}', ['\u{2c22}', '\u{0}', '\u{0}']), + ('\u{2c53}', ['\u{2c23}', '\u{0}', '\u{0}']), ('\u{2c54}', ['\u{2c24}', '\u{0}', '\u{0}']), + ('\u{2c55}', ['\u{2c25}', '\u{0}', '\u{0}']), ('\u{2c56}', ['\u{2c26}', '\u{0}', '\u{0}']), + ('\u{2c57}', ['\u{2c27}', '\u{0}', '\u{0}']), ('\u{2c58}', ['\u{2c28}', '\u{0}', '\u{0}']), + ('\u{2c59}', ['\u{2c29}', '\u{0}', '\u{0}']), ('\u{2c5a}', ['\u{2c2a}', '\u{0}', '\u{0}']), + ('\u{2c5b}', ['\u{2c2b}', '\u{0}', '\u{0}']), ('\u{2c5c}', ['\u{2c2c}', '\u{0}', '\u{0}']), + ('\u{2c5d}', ['\u{2c2d}', '\u{0}', '\u{0}']), ('\u{2c5e}', ['\u{2c2e}', '\u{0}', '\u{0}']), + ('\u{2c5f}', ['\u{2c2f}', '\u{0}', '\u{0}']), ('\u{2c61}', ['\u{2c60}', '\u{0}', '\u{0}']), + ('\u{2c65}', ['\u{23a}', '\u{0}', '\u{0}']), ('\u{2c66}', ['\u{23e}', '\u{0}', '\u{0}']), + ('\u{2c68}', ['\u{2c67}', '\u{0}', '\u{0}']), ('\u{2c6a}', ['\u{2c69}', '\u{0}', '\u{0}']), + ('\u{2c6c}', ['\u{2c6b}', '\u{0}', '\u{0}']), ('\u{2c73}', ['\u{2c72}', '\u{0}', '\u{0}']), + ('\u{2c76}', ['\u{2c75}', '\u{0}', '\u{0}']), ('\u{2c81}', ['\u{2c80}', '\u{0}', '\u{0}']), + ('\u{2c83}', ['\u{2c82}', '\u{0}', '\u{0}']), ('\u{2c85}', ['\u{2c84}', '\u{0}', '\u{0}']), + ('\u{2c87}', ['\u{2c86}', '\u{0}', '\u{0}']), ('\u{2c89}', ['\u{2c88}', '\u{0}', '\u{0}']), + ('\u{2c8b}', ['\u{2c8a}', '\u{0}', '\u{0}']), ('\u{2c8d}', ['\u{2c8c}', '\u{0}', '\u{0}']), + ('\u{2c8f}', ['\u{2c8e}', '\u{0}', '\u{0}']), ('\u{2c91}', ['\u{2c90}', '\u{0}', '\u{0}']), + ('\u{2c93}', ['\u{2c92}', '\u{0}', '\u{0}']), ('\u{2c95}', ['\u{2c94}', '\u{0}', '\u{0}']), + ('\u{2c97}', ['\u{2c96}', '\u{0}', '\u{0}']), ('\u{2c99}', ['\u{2c98}', '\u{0}', '\u{0}']), + ('\u{2c9b}', ['\u{2c9a}', '\u{0}', '\u{0}']), ('\u{2c9d}', ['\u{2c9c}', '\u{0}', '\u{0}']), + ('\u{2c9f}', ['\u{2c9e}', '\u{0}', '\u{0}']), ('\u{2ca1}', ['\u{2ca0}', '\u{0}', '\u{0}']), + ('\u{2ca3}', ['\u{2ca2}', '\u{0}', '\u{0}']), ('\u{2ca5}', ['\u{2ca4}', '\u{0}', '\u{0}']), + ('\u{2ca7}', ['\u{2ca6}', '\u{0}', '\u{0}']), ('\u{2ca9}', ['\u{2ca8}', '\u{0}', '\u{0}']), + ('\u{2cab}', ['\u{2caa}', '\u{0}', '\u{0}']), ('\u{2cad}', ['\u{2cac}', '\u{0}', '\u{0}']), + ('\u{2caf}', ['\u{2cae}', '\u{0}', '\u{0}']), ('\u{2cb1}', ['\u{2cb0}', '\u{0}', '\u{0}']), + ('\u{2cb3}', ['\u{2cb2}', '\u{0}', '\u{0}']), ('\u{2cb5}', ['\u{2cb4}', '\u{0}', '\u{0}']), + ('\u{2cb7}', ['\u{2cb6}', '\u{0}', '\u{0}']), ('\u{2cb9}', ['\u{2cb8}', '\u{0}', '\u{0}']), + ('\u{2cbb}', ['\u{2cba}', '\u{0}', '\u{0}']), ('\u{2cbd}', ['\u{2cbc}', '\u{0}', '\u{0}']), + ('\u{2cbf}', ['\u{2cbe}', '\u{0}', '\u{0}']), ('\u{2cc1}', ['\u{2cc0}', '\u{0}', '\u{0}']), + ('\u{2cc3}', ['\u{2cc2}', '\u{0}', '\u{0}']), ('\u{2cc5}', ['\u{2cc4}', '\u{0}', '\u{0}']), + ('\u{2cc7}', ['\u{2cc6}', '\u{0}', '\u{0}']), ('\u{2cc9}', ['\u{2cc8}', '\u{0}', '\u{0}']), + ('\u{2ccb}', ['\u{2cca}', '\u{0}', '\u{0}']), ('\u{2ccd}', ['\u{2ccc}', '\u{0}', '\u{0}']), + ('\u{2ccf}', ['\u{2cce}', '\u{0}', '\u{0}']), ('\u{2cd1}', ['\u{2cd0}', '\u{0}', '\u{0}']), + ('\u{2cd3}', ['\u{2cd2}', '\u{0}', '\u{0}']), ('\u{2cd5}', ['\u{2cd4}', '\u{0}', '\u{0}']), + ('\u{2cd7}', ['\u{2cd6}', '\u{0}', '\u{0}']), ('\u{2cd9}', ['\u{2cd8}', '\u{0}', '\u{0}']), + ('\u{2cdb}', ['\u{2cda}', '\u{0}', '\u{0}']), ('\u{2cdd}', ['\u{2cdc}', '\u{0}', '\u{0}']), + ('\u{2cdf}', ['\u{2cde}', '\u{0}', '\u{0}']), ('\u{2ce1}', ['\u{2ce0}', '\u{0}', '\u{0}']), + ('\u{2ce3}', ['\u{2ce2}', '\u{0}', '\u{0}']), ('\u{2cec}', ['\u{2ceb}', '\u{0}', '\u{0}']), + ('\u{2cee}', ['\u{2ced}', '\u{0}', '\u{0}']), ('\u{2cf3}', ['\u{2cf2}', '\u{0}', '\u{0}']), + ('\u{2d00}', ['\u{10a0}', '\u{0}', '\u{0}']), ('\u{2d01}', ['\u{10a1}', '\u{0}', '\u{0}']), + ('\u{2d02}', ['\u{10a2}', '\u{0}', '\u{0}']), ('\u{2d03}', ['\u{10a3}', '\u{0}', '\u{0}']), + ('\u{2d04}', ['\u{10a4}', '\u{0}', '\u{0}']), ('\u{2d05}', ['\u{10a5}', '\u{0}', '\u{0}']), + ('\u{2d06}', ['\u{10a6}', '\u{0}', '\u{0}']), ('\u{2d07}', ['\u{10a7}', '\u{0}', '\u{0}']), + ('\u{2d08}', ['\u{10a8}', '\u{0}', '\u{0}']), ('\u{2d09}', ['\u{10a9}', '\u{0}', '\u{0}']), + ('\u{2d0a}', ['\u{10aa}', '\u{0}', '\u{0}']), ('\u{2d0b}', ['\u{10ab}', '\u{0}', '\u{0}']), + ('\u{2d0c}', ['\u{10ac}', '\u{0}', '\u{0}']), ('\u{2d0d}', ['\u{10ad}', '\u{0}', '\u{0}']), + ('\u{2d0e}', ['\u{10ae}', '\u{0}', '\u{0}']), ('\u{2d0f}', ['\u{10af}', '\u{0}', '\u{0}']), + ('\u{2d10}', ['\u{10b0}', '\u{0}', '\u{0}']), ('\u{2d11}', ['\u{10b1}', '\u{0}', '\u{0}']), + ('\u{2d12}', ['\u{10b2}', '\u{0}', '\u{0}']), ('\u{2d13}', ['\u{10b3}', '\u{0}', '\u{0}']), + ('\u{2d14}', ['\u{10b4}', '\u{0}', '\u{0}']), ('\u{2d15}', ['\u{10b5}', '\u{0}', '\u{0}']), + ('\u{2d16}', ['\u{10b6}', '\u{0}', '\u{0}']), ('\u{2d17}', ['\u{10b7}', '\u{0}', '\u{0}']), + ('\u{2d18}', ['\u{10b8}', '\u{0}', '\u{0}']), ('\u{2d19}', ['\u{10b9}', '\u{0}', '\u{0}']), + ('\u{2d1a}', ['\u{10ba}', '\u{0}', '\u{0}']), ('\u{2d1b}', ['\u{10bb}', '\u{0}', '\u{0}']), + ('\u{2d1c}', ['\u{10bc}', '\u{0}', '\u{0}']), ('\u{2d1d}', ['\u{10bd}', '\u{0}', '\u{0}']), + ('\u{2d1e}', ['\u{10be}', '\u{0}', '\u{0}']), ('\u{2d1f}', ['\u{10bf}', '\u{0}', '\u{0}']), + ('\u{2d20}', ['\u{10c0}', '\u{0}', '\u{0}']), ('\u{2d21}', ['\u{10c1}', '\u{0}', '\u{0}']), + ('\u{2d22}', ['\u{10c2}', '\u{0}', '\u{0}']), ('\u{2d23}', ['\u{10c3}', '\u{0}', '\u{0}']), + ('\u{2d24}', ['\u{10c4}', '\u{0}', '\u{0}']), ('\u{2d25}', ['\u{10c5}', '\u{0}', '\u{0}']), + ('\u{2d27}', ['\u{10c7}', '\u{0}', '\u{0}']), ('\u{2d2d}', ['\u{10cd}', '\u{0}', '\u{0}']), + ('\u{a641}', ['\u{a640}', '\u{0}', '\u{0}']), ('\u{a643}', ['\u{a642}', '\u{0}', '\u{0}']), + ('\u{a645}', ['\u{a644}', '\u{0}', '\u{0}']), ('\u{a647}', ['\u{a646}', '\u{0}', '\u{0}']), + ('\u{a649}', ['\u{a648}', '\u{0}', '\u{0}']), ('\u{a64b}', ['\u{a64a}', '\u{0}', '\u{0}']), + ('\u{a64d}', ['\u{a64c}', '\u{0}', '\u{0}']), ('\u{a64f}', ['\u{a64e}', '\u{0}', '\u{0}']), + ('\u{a651}', ['\u{a650}', '\u{0}', '\u{0}']), ('\u{a653}', ['\u{a652}', '\u{0}', '\u{0}']), + ('\u{a655}', ['\u{a654}', '\u{0}', '\u{0}']), ('\u{a657}', ['\u{a656}', '\u{0}', '\u{0}']), + ('\u{a659}', ['\u{a658}', '\u{0}', '\u{0}']), ('\u{a65b}', ['\u{a65a}', '\u{0}', '\u{0}']), + ('\u{a65d}', ['\u{a65c}', '\u{0}', '\u{0}']), ('\u{a65f}', ['\u{a65e}', '\u{0}', '\u{0}']), + ('\u{a661}', ['\u{a660}', '\u{0}', '\u{0}']), ('\u{a663}', ['\u{a662}', '\u{0}', '\u{0}']), + ('\u{a665}', ['\u{a664}', '\u{0}', '\u{0}']), ('\u{a667}', ['\u{a666}', '\u{0}', '\u{0}']), + ('\u{a669}', ['\u{a668}', '\u{0}', '\u{0}']), ('\u{a66b}', ['\u{a66a}', '\u{0}', '\u{0}']), + ('\u{a66d}', ['\u{a66c}', '\u{0}', '\u{0}']), ('\u{a681}', ['\u{a680}', '\u{0}', '\u{0}']), + ('\u{a683}', ['\u{a682}', '\u{0}', '\u{0}']), ('\u{a685}', ['\u{a684}', '\u{0}', '\u{0}']), + ('\u{a687}', ['\u{a686}', '\u{0}', '\u{0}']), ('\u{a689}', ['\u{a688}', '\u{0}', '\u{0}']), + ('\u{a68b}', ['\u{a68a}', '\u{0}', '\u{0}']), ('\u{a68d}', ['\u{a68c}', '\u{0}', '\u{0}']), + ('\u{a68f}', ['\u{a68e}', '\u{0}', '\u{0}']), ('\u{a691}', ['\u{a690}', '\u{0}', '\u{0}']), + ('\u{a693}', ['\u{a692}', '\u{0}', '\u{0}']), ('\u{a695}', ['\u{a694}', '\u{0}', '\u{0}']), + ('\u{a697}', ['\u{a696}', '\u{0}', '\u{0}']), ('\u{a699}', ['\u{a698}', '\u{0}', '\u{0}']), + ('\u{a69b}', ['\u{a69a}', '\u{0}', '\u{0}']), ('\u{a723}', ['\u{a722}', '\u{0}', '\u{0}']), + ('\u{a725}', ['\u{a724}', '\u{0}', '\u{0}']), ('\u{a727}', ['\u{a726}', '\u{0}', '\u{0}']), + ('\u{a729}', ['\u{a728}', '\u{0}', '\u{0}']), ('\u{a72b}', ['\u{a72a}', '\u{0}', '\u{0}']), + ('\u{a72d}', ['\u{a72c}', '\u{0}', '\u{0}']), ('\u{a72f}', ['\u{a72e}', '\u{0}', '\u{0}']), + ('\u{a733}', ['\u{a732}', '\u{0}', '\u{0}']), ('\u{a735}', ['\u{a734}', '\u{0}', '\u{0}']), + ('\u{a737}', ['\u{a736}', '\u{0}', '\u{0}']), ('\u{a739}', ['\u{a738}', '\u{0}', '\u{0}']), + ('\u{a73b}', ['\u{a73a}', '\u{0}', '\u{0}']), ('\u{a73d}', ['\u{a73c}', '\u{0}', '\u{0}']), + ('\u{a73f}', ['\u{a73e}', '\u{0}', '\u{0}']), ('\u{a741}', ['\u{a740}', '\u{0}', '\u{0}']), + ('\u{a743}', ['\u{a742}', '\u{0}', '\u{0}']), ('\u{a745}', ['\u{a744}', '\u{0}', '\u{0}']), + ('\u{a747}', ['\u{a746}', '\u{0}', '\u{0}']), ('\u{a749}', ['\u{a748}', '\u{0}', '\u{0}']), + ('\u{a74b}', ['\u{a74a}', '\u{0}', '\u{0}']), ('\u{a74d}', ['\u{a74c}', '\u{0}', '\u{0}']), + ('\u{a74f}', ['\u{a74e}', '\u{0}', '\u{0}']), ('\u{a751}', ['\u{a750}', '\u{0}', '\u{0}']), + ('\u{a753}', ['\u{a752}', '\u{0}', '\u{0}']), ('\u{a755}', ['\u{a754}', '\u{0}', '\u{0}']), + ('\u{a757}', ['\u{a756}', '\u{0}', '\u{0}']), ('\u{a759}', ['\u{a758}', '\u{0}', '\u{0}']), + ('\u{a75b}', ['\u{a75a}', '\u{0}', '\u{0}']), ('\u{a75d}', ['\u{a75c}', '\u{0}', '\u{0}']), + ('\u{a75f}', ['\u{a75e}', '\u{0}', '\u{0}']), ('\u{a761}', ['\u{a760}', '\u{0}', '\u{0}']), + ('\u{a763}', ['\u{a762}', '\u{0}', '\u{0}']), ('\u{a765}', ['\u{a764}', '\u{0}', '\u{0}']), + ('\u{a767}', ['\u{a766}', '\u{0}', '\u{0}']), ('\u{a769}', ['\u{a768}', '\u{0}', '\u{0}']), + ('\u{a76b}', ['\u{a76a}', '\u{0}', '\u{0}']), ('\u{a76d}', ['\u{a76c}', '\u{0}', '\u{0}']), + ('\u{a76f}', ['\u{a76e}', '\u{0}', '\u{0}']), ('\u{a77a}', ['\u{a779}', '\u{0}', '\u{0}']), + ('\u{a77c}', ['\u{a77b}', '\u{0}', '\u{0}']), ('\u{a77f}', ['\u{a77e}', '\u{0}', '\u{0}']), + ('\u{a781}', ['\u{a780}', '\u{0}', '\u{0}']), ('\u{a783}', ['\u{a782}', '\u{0}', '\u{0}']), + ('\u{a785}', ['\u{a784}', '\u{0}', '\u{0}']), ('\u{a787}', ['\u{a786}', '\u{0}', '\u{0}']), + ('\u{a78c}', ['\u{a78b}', '\u{0}', '\u{0}']), ('\u{a791}', ['\u{a790}', '\u{0}', '\u{0}']), + ('\u{a793}', ['\u{a792}', '\u{0}', '\u{0}']), ('\u{a794}', ['\u{a7c4}', '\u{0}', '\u{0}']), + ('\u{a797}', ['\u{a796}', '\u{0}', '\u{0}']), ('\u{a799}', ['\u{a798}', '\u{0}', '\u{0}']), + ('\u{a79b}', ['\u{a79a}', '\u{0}', '\u{0}']), ('\u{a79d}', ['\u{a79c}', '\u{0}', '\u{0}']), + ('\u{a79f}', ['\u{a79e}', '\u{0}', '\u{0}']), ('\u{a7a1}', ['\u{a7a0}', '\u{0}', '\u{0}']), + ('\u{a7a3}', ['\u{a7a2}', '\u{0}', '\u{0}']), ('\u{a7a5}', ['\u{a7a4}', '\u{0}', '\u{0}']), + ('\u{a7a7}', ['\u{a7a6}', '\u{0}', '\u{0}']), ('\u{a7a9}', ['\u{a7a8}', '\u{0}', '\u{0}']), + ('\u{a7b5}', ['\u{a7b4}', '\u{0}', '\u{0}']), ('\u{a7b7}', ['\u{a7b6}', '\u{0}', '\u{0}']), + ('\u{a7b9}', ['\u{a7b8}', '\u{0}', '\u{0}']), ('\u{a7bb}', ['\u{a7ba}', '\u{0}', '\u{0}']), + ('\u{a7bd}', ['\u{a7bc}', '\u{0}', '\u{0}']), ('\u{a7bf}', ['\u{a7be}', '\u{0}', '\u{0}']), + ('\u{a7c1}', ['\u{a7c0}', '\u{0}', '\u{0}']), ('\u{a7c3}', ['\u{a7c2}', '\u{0}', '\u{0}']), + ('\u{a7c8}', ['\u{a7c7}', '\u{0}', '\u{0}']), ('\u{a7ca}', ['\u{a7c9}', '\u{0}', '\u{0}']), + ('\u{a7cd}', ['\u{a7cc}', '\u{0}', '\u{0}']), ('\u{a7cf}', ['\u{a7ce}', '\u{0}', '\u{0}']), + ('\u{a7d1}', ['\u{a7d0}', '\u{0}', '\u{0}']), ('\u{a7d3}', ['\u{a7d2}', '\u{0}', '\u{0}']), + ('\u{a7d5}', ['\u{a7d4}', '\u{0}', '\u{0}']), ('\u{a7d7}', ['\u{a7d6}', '\u{0}', '\u{0}']), + ('\u{a7d9}', ['\u{a7d8}', '\u{0}', '\u{0}']), ('\u{a7db}', ['\u{a7da}', '\u{0}', '\u{0}']), + ('\u{a7f6}', ['\u{a7f5}', '\u{0}', '\u{0}']), ('\u{ab53}', ['\u{a7b3}', '\u{0}', '\u{0}']), + ('\u{ab70}', ['\u{13a0}', '\u{0}', '\u{0}']), ('\u{ab71}', ['\u{13a1}', '\u{0}', '\u{0}']), + ('\u{ab72}', ['\u{13a2}', '\u{0}', '\u{0}']), ('\u{ab73}', ['\u{13a3}', '\u{0}', '\u{0}']), + ('\u{ab74}', ['\u{13a4}', '\u{0}', '\u{0}']), ('\u{ab75}', ['\u{13a5}', '\u{0}', '\u{0}']), + ('\u{ab76}', ['\u{13a6}', '\u{0}', '\u{0}']), ('\u{ab77}', ['\u{13a7}', '\u{0}', '\u{0}']), + ('\u{ab78}', ['\u{13a8}', '\u{0}', '\u{0}']), ('\u{ab79}', ['\u{13a9}', '\u{0}', '\u{0}']), + ('\u{ab7a}', ['\u{13aa}', '\u{0}', '\u{0}']), ('\u{ab7b}', ['\u{13ab}', '\u{0}', '\u{0}']), + ('\u{ab7c}', ['\u{13ac}', '\u{0}', '\u{0}']), ('\u{ab7d}', ['\u{13ad}', '\u{0}', '\u{0}']), + ('\u{ab7e}', ['\u{13ae}', '\u{0}', '\u{0}']), ('\u{ab7f}', ['\u{13af}', '\u{0}', '\u{0}']), + ('\u{ab80}', ['\u{13b0}', '\u{0}', '\u{0}']), ('\u{ab81}', ['\u{13b1}', '\u{0}', '\u{0}']), + ('\u{ab82}', ['\u{13b2}', '\u{0}', '\u{0}']), ('\u{ab83}', ['\u{13b3}', '\u{0}', '\u{0}']), + ('\u{ab84}', ['\u{13b4}', '\u{0}', '\u{0}']), ('\u{ab85}', ['\u{13b5}', '\u{0}', '\u{0}']), + ('\u{ab86}', ['\u{13b6}', '\u{0}', '\u{0}']), ('\u{ab87}', ['\u{13b7}', '\u{0}', '\u{0}']), + ('\u{ab88}', ['\u{13b8}', '\u{0}', '\u{0}']), ('\u{ab89}', ['\u{13b9}', '\u{0}', '\u{0}']), + ('\u{ab8a}', ['\u{13ba}', '\u{0}', '\u{0}']), ('\u{ab8b}', ['\u{13bb}', '\u{0}', '\u{0}']), + ('\u{ab8c}', ['\u{13bc}', '\u{0}', '\u{0}']), ('\u{ab8d}', ['\u{13bd}', '\u{0}', '\u{0}']), + ('\u{ab8e}', ['\u{13be}', '\u{0}', '\u{0}']), ('\u{ab8f}', ['\u{13bf}', '\u{0}', '\u{0}']), + ('\u{ab90}', ['\u{13c0}', '\u{0}', '\u{0}']), ('\u{ab91}', ['\u{13c1}', '\u{0}', '\u{0}']), + ('\u{ab92}', ['\u{13c2}', '\u{0}', '\u{0}']), ('\u{ab93}', ['\u{13c3}', '\u{0}', '\u{0}']), + ('\u{ab94}', ['\u{13c4}', '\u{0}', '\u{0}']), ('\u{ab95}', ['\u{13c5}', '\u{0}', '\u{0}']), + ('\u{ab96}', ['\u{13c6}', '\u{0}', '\u{0}']), ('\u{ab97}', ['\u{13c7}', '\u{0}', '\u{0}']), + ('\u{ab98}', ['\u{13c8}', '\u{0}', '\u{0}']), ('\u{ab99}', ['\u{13c9}', '\u{0}', '\u{0}']), + ('\u{ab9a}', ['\u{13ca}', '\u{0}', '\u{0}']), ('\u{ab9b}', ['\u{13cb}', '\u{0}', '\u{0}']), + ('\u{ab9c}', ['\u{13cc}', '\u{0}', '\u{0}']), ('\u{ab9d}', ['\u{13cd}', '\u{0}', '\u{0}']), + ('\u{ab9e}', ['\u{13ce}', '\u{0}', '\u{0}']), ('\u{ab9f}', ['\u{13cf}', '\u{0}', '\u{0}']), + ('\u{aba0}', ['\u{13d0}', '\u{0}', '\u{0}']), ('\u{aba1}', ['\u{13d1}', '\u{0}', '\u{0}']), + ('\u{aba2}', ['\u{13d2}', '\u{0}', '\u{0}']), ('\u{aba3}', ['\u{13d3}', '\u{0}', '\u{0}']), + ('\u{aba4}', ['\u{13d4}', '\u{0}', '\u{0}']), ('\u{aba5}', ['\u{13d5}', '\u{0}', '\u{0}']), + ('\u{aba6}', ['\u{13d6}', '\u{0}', '\u{0}']), ('\u{aba7}', ['\u{13d7}', '\u{0}', '\u{0}']), + ('\u{aba8}', ['\u{13d8}', '\u{0}', '\u{0}']), ('\u{aba9}', ['\u{13d9}', '\u{0}', '\u{0}']), + ('\u{abaa}', ['\u{13da}', '\u{0}', '\u{0}']), ('\u{abab}', ['\u{13db}', '\u{0}', '\u{0}']), + ('\u{abac}', ['\u{13dc}', '\u{0}', '\u{0}']), ('\u{abad}', ['\u{13dd}', '\u{0}', '\u{0}']), + ('\u{abae}', ['\u{13de}', '\u{0}', '\u{0}']), ('\u{abaf}', ['\u{13df}', '\u{0}', '\u{0}']), + ('\u{abb0}', ['\u{13e0}', '\u{0}', '\u{0}']), ('\u{abb1}', ['\u{13e1}', '\u{0}', '\u{0}']), + ('\u{abb2}', ['\u{13e2}', '\u{0}', '\u{0}']), ('\u{abb3}', ['\u{13e3}', '\u{0}', '\u{0}']), + ('\u{abb4}', ['\u{13e4}', '\u{0}', '\u{0}']), ('\u{abb5}', ['\u{13e5}', '\u{0}', '\u{0}']), + ('\u{abb6}', ['\u{13e6}', '\u{0}', '\u{0}']), ('\u{abb7}', ['\u{13e7}', '\u{0}', '\u{0}']), + ('\u{abb8}', ['\u{13e8}', '\u{0}', '\u{0}']), ('\u{abb9}', ['\u{13e9}', '\u{0}', '\u{0}']), + ('\u{abba}', ['\u{13ea}', '\u{0}', '\u{0}']), ('\u{abbb}', ['\u{13eb}', '\u{0}', '\u{0}']), + ('\u{abbc}', ['\u{13ec}', '\u{0}', '\u{0}']), ('\u{abbd}', ['\u{13ed}', '\u{0}', '\u{0}']), + ('\u{abbe}', ['\u{13ee}', '\u{0}', '\u{0}']), ('\u{abbf}', ['\u{13ef}', '\u{0}', '\u{0}']), + ('\u{fb00}', ['\u{46}', '\u{46}', '\u{0}']), ('\u{fb01}', ['\u{46}', '\u{49}', '\u{0}']), + ('\u{fb02}', ['\u{46}', '\u{4c}', '\u{0}']), ('\u{fb03}', ['\u{46}', '\u{46}', '\u{49}']), + ('\u{fb04}', ['\u{46}', '\u{46}', '\u{4c}']), ('\u{fb05}', ['\u{53}', '\u{54}', '\u{0}']), + ('\u{fb06}', ['\u{53}', '\u{54}', '\u{0}']), ('\u{fb13}', ['\u{544}', '\u{546}', '\u{0}']), + ('\u{fb14}', ['\u{544}', '\u{535}', '\u{0}']), + ('\u{fb15}', ['\u{544}', '\u{53b}', '\u{0}']), + ('\u{fb16}', ['\u{54e}', '\u{546}', '\u{0}']), + ('\u{fb17}', ['\u{544}', '\u{53d}', '\u{0}']), ('\u{ff41}', ['\u{ff21}', '\u{0}', '\u{0}']), + ('\u{ff42}', ['\u{ff22}', '\u{0}', '\u{0}']), ('\u{ff43}', ['\u{ff23}', '\u{0}', '\u{0}']), + ('\u{ff44}', ['\u{ff24}', '\u{0}', '\u{0}']), ('\u{ff45}', ['\u{ff25}', '\u{0}', '\u{0}']), + ('\u{ff46}', ['\u{ff26}', '\u{0}', '\u{0}']), ('\u{ff47}', ['\u{ff27}', '\u{0}', '\u{0}']), + ('\u{ff48}', ['\u{ff28}', '\u{0}', '\u{0}']), ('\u{ff49}', ['\u{ff29}', '\u{0}', '\u{0}']), + ('\u{ff4a}', ['\u{ff2a}', '\u{0}', '\u{0}']), ('\u{ff4b}', ['\u{ff2b}', '\u{0}', '\u{0}']), + ('\u{ff4c}', ['\u{ff2c}', '\u{0}', '\u{0}']), ('\u{ff4d}', ['\u{ff2d}', '\u{0}', '\u{0}']), + ('\u{ff4e}', ['\u{ff2e}', '\u{0}', '\u{0}']), ('\u{ff4f}', ['\u{ff2f}', '\u{0}', '\u{0}']), + ('\u{ff50}', ['\u{ff30}', '\u{0}', '\u{0}']), ('\u{ff51}', ['\u{ff31}', '\u{0}', '\u{0}']), + ('\u{ff52}', ['\u{ff32}', '\u{0}', '\u{0}']), ('\u{ff53}', ['\u{ff33}', '\u{0}', '\u{0}']), + ('\u{ff54}', ['\u{ff34}', '\u{0}', '\u{0}']), ('\u{ff55}', ['\u{ff35}', '\u{0}', '\u{0}']), + ('\u{ff56}', ['\u{ff36}', '\u{0}', '\u{0}']), ('\u{ff57}', ['\u{ff37}', '\u{0}', '\u{0}']), + ('\u{ff58}', ['\u{ff38}', '\u{0}', '\u{0}']), ('\u{ff59}', ['\u{ff39}', '\u{0}', '\u{0}']), + ('\u{ff5a}', ['\u{ff3a}', '\u{0}', '\u{0}']), + ('\u{10428}', ['\u{10400}', '\u{0}', '\u{0}']), + ('\u{10429}', ['\u{10401}', '\u{0}', '\u{0}']), + ('\u{1042a}', ['\u{10402}', '\u{0}', '\u{0}']), + ('\u{1042b}', ['\u{10403}', '\u{0}', '\u{0}']), + ('\u{1042c}', ['\u{10404}', '\u{0}', '\u{0}']), + ('\u{1042d}', ['\u{10405}', '\u{0}', '\u{0}']), + ('\u{1042e}', ['\u{10406}', '\u{0}', '\u{0}']), + ('\u{1042f}', ['\u{10407}', '\u{0}', '\u{0}']), + ('\u{10430}', ['\u{10408}', '\u{0}', '\u{0}']), + ('\u{10431}', ['\u{10409}', '\u{0}', '\u{0}']), + ('\u{10432}', ['\u{1040a}', '\u{0}', '\u{0}']), + ('\u{10433}', ['\u{1040b}', '\u{0}', '\u{0}']), + ('\u{10434}', ['\u{1040c}', '\u{0}', '\u{0}']), + ('\u{10435}', ['\u{1040d}', '\u{0}', '\u{0}']), + ('\u{10436}', ['\u{1040e}', '\u{0}', '\u{0}']), + ('\u{10437}', ['\u{1040f}', '\u{0}', '\u{0}']), + ('\u{10438}', ['\u{10410}', '\u{0}', '\u{0}']), + ('\u{10439}', ['\u{10411}', '\u{0}', '\u{0}']), + ('\u{1043a}', ['\u{10412}', '\u{0}', '\u{0}']), + ('\u{1043b}', ['\u{10413}', '\u{0}', '\u{0}']), + ('\u{1043c}', ['\u{10414}', '\u{0}', '\u{0}']), + ('\u{1043d}', ['\u{10415}', '\u{0}', '\u{0}']), + ('\u{1043e}', ['\u{10416}', '\u{0}', '\u{0}']), + ('\u{1043f}', ['\u{10417}', '\u{0}', '\u{0}']), + ('\u{10440}', ['\u{10418}', '\u{0}', '\u{0}']), + ('\u{10441}', ['\u{10419}', '\u{0}', '\u{0}']), + ('\u{10442}', ['\u{1041a}', '\u{0}', '\u{0}']), + ('\u{10443}', ['\u{1041b}', '\u{0}', '\u{0}']), + ('\u{10444}', ['\u{1041c}', '\u{0}', '\u{0}']), + ('\u{10445}', ['\u{1041d}', '\u{0}', '\u{0}']), + ('\u{10446}', ['\u{1041e}', '\u{0}', '\u{0}']), + ('\u{10447}', ['\u{1041f}', '\u{0}', '\u{0}']), + ('\u{10448}', ['\u{10420}', '\u{0}', '\u{0}']), + ('\u{10449}', ['\u{10421}', '\u{0}', '\u{0}']), + ('\u{1044a}', ['\u{10422}', '\u{0}', '\u{0}']), + ('\u{1044b}', ['\u{10423}', '\u{0}', '\u{0}']), + ('\u{1044c}', ['\u{10424}', '\u{0}', '\u{0}']), + ('\u{1044d}', ['\u{10425}', '\u{0}', '\u{0}']), + ('\u{1044e}', ['\u{10426}', '\u{0}', '\u{0}']), + ('\u{1044f}', ['\u{10427}', '\u{0}', '\u{0}']), + ('\u{104d8}', ['\u{104b0}', '\u{0}', '\u{0}']), + ('\u{104d9}', ['\u{104b1}', '\u{0}', '\u{0}']), + ('\u{104da}', ['\u{104b2}', '\u{0}', '\u{0}']), + ('\u{104db}', ['\u{104b3}', '\u{0}', '\u{0}']), + ('\u{104dc}', ['\u{104b4}', '\u{0}', '\u{0}']), + ('\u{104dd}', ['\u{104b5}', '\u{0}', '\u{0}']), + ('\u{104de}', ['\u{104b6}', '\u{0}', '\u{0}']), + ('\u{104df}', ['\u{104b7}', '\u{0}', '\u{0}']), + ('\u{104e0}', ['\u{104b8}', '\u{0}', '\u{0}']), + ('\u{104e1}', ['\u{104b9}', '\u{0}', '\u{0}']), + ('\u{104e2}', ['\u{104ba}', '\u{0}', '\u{0}']), + ('\u{104e3}', ['\u{104bb}', '\u{0}', '\u{0}']), + ('\u{104e4}', ['\u{104bc}', '\u{0}', '\u{0}']), + ('\u{104e5}', ['\u{104bd}', '\u{0}', '\u{0}']), + ('\u{104e6}', ['\u{104be}', '\u{0}', '\u{0}']), + ('\u{104e7}', ['\u{104bf}', '\u{0}', '\u{0}']), + ('\u{104e8}', ['\u{104c0}', '\u{0}', '\u{0}']), + ('\u{104e9}', ['\u{104c1}', '\u{0}', '\u{0}']), + ('\u{104ea}', ['\u{104c2}', '\u{0}', '\u{0}']), + ('\u{104eb}', ['\u{104c3}', '\u{0}', '\u{0}']), + ('\u{104ec}', ['\u{104c4}', '\u{0}', '\u{0}']), + ('\u{104ed}', ['\u{104c5}', '\u{0}', '\u{0}']), + ('\u{104ee}', ['\u{104c6}', '\u{0}', '\u{0}']), + ('\u{104ef}', ['\u{104c7}', '\u{0}', '\u{0}']), + ('\u{104f0}', ['\u{104c8}', '\u{0}', '\u{0}']), + ('\u{104f1}', ['\u{104c9}', '\u{0}', '\u{0}']), + ('\u{104f2}', ['\u{104ca}', '\u{0}', '\u{0}']), + ('\u{104f3}', ['\u{104cb}', '\u{0}', '\u{0}']), + ('\u{104f4}', ['\u{104cc}', '\u{0}', '\u{0}']), + ('\u{104f5}', ['\u{104cd}', '\u{0}', '\u{0}']), + ('\u{104f6}', ['\u{104ce}', '\u{0}', '\u{0}']), + ('\u{104f7}', ['\u{104cf}', '\u{0}', '\u{0}']), + ('\u{104f8}', ['\u{104d0}', '\u{0}', '\u{0}']), + ('\u{104f9}', ['\u{104d1}', '\u{0}', '\u{0}']), + ('\u{104fa}', ['\u{104d2}', '\u{0}', '\u{0}']), + ('\u{104fb}', ['\u{104d3}', '\u{0}', '\u{0}']), + ('\u{10597}', ['\u{10570}', '\u{0}', '\u{0}']), + ('\u{10598}', ['\u{10571}', '\u{0}', '\u{0}']), + ('\u{10599}', ['\u{10572}', '\u{0}', '\u{0}']), + ('\u{1059a}', ['\u{10573}', '\u{0}', '\u{0}']), + ('\u{1059b}', ['\u{10574}', '\u{0}', '\u{0}']), + ('\u{1059c}', ['\u{10575}', '\u{0}', '\u{0}']), + ('\u{1059d}', ['\u{10576}', '\u{0}', '\u{0}']), + ('\u{1059e}', ['\u{10577}', '\u{0}', '\u{0}']), + ('\u{1059f}', ['\u{10578}', '\u{0}', '\u{0}']), + ('\u{105a0}', ['\u{10579}', '\u{0}', '\u{0}']), + ('\u{105a1}', ['\u{1057a}', '\u{0}', '\u{0}']), + ('\u{105a3}', ['\u{1057c}', '\u{0}', '\u{0}']), + ('\u{105a4}', ['\u{1057d}', '\u{0}', '\u{0}']), + ('\u{105a5}', ['\u{1057e}', '\u{0}', '\u{0}']), + ('\u{105a6}', ['\u{1057f}', '\u{0}', '\u{0}']), + ('\u{105a7}', ['\u{10580}', '\u{0}', '\u{0}']), + ('\u{105a8}', ['\u{10581}', '\u{0}', '\u{0}']), + ('\u{105a9}', ['\u{10582}', '\u{0}', '\u{0}']), + ('\u{105aa}', ['\u{10583}', '\u{0}', '\u{0}']), + ('\u{105ab}', ['\u{10584}', '\u{0}', '\u{0}']), + ('\u{105ac}', ['\u{10585}', '\u{0}', '\u{0}']), + ('\u{105ad}', ['\u{10586}', '\u{0}', '\u{0}']), + ('\u{105ae}', ['\u{10587}', '\u{0}', '\u{0}']), + ('\u{105af}', ['\u{10588}', '\u{0}', '\u{0}']), + ('\u{105b0}', ['\u{10589}', '\u{0}', '\u{0}']), + ('\u{105b1}', ['\u{1058a}', '\u{0}', '\u{0}']), + ('\u{105b3}', ['\u{1058c}', '\u{0}', '\u{0}']), + ('\u{105b4}', ['\u{1058d}', '\u{0}', '\u{0}']), + ('\u{105b5}', ['\u{1058e}', '\u{0}', '\u{0}']), + ('\u{105b6}', ['\u{1058f}', '\u{0}', '\u{0}']), + ('\u{105b7}', ['\u{10590}', '\u{0}', '\u{0}']), + ('\u{105b8}', ['\u{10591}', '\u{0}', '\u{0}']), + ('\u{105b9}', ['\u{10592}', '\u{0}', '\u{0}']), + ('\u{105bb}', ['\u{10594}', '\u{0}', '\u{0}']), + ('\u{105bc}', ['\u{10595}', '\u{0}', '\u{0}']), + ('\u{10cc0}', ['\u{10c80}', '\u{0}', '\u{0}']), + ('\u{10cc1}', ['\u{10c81}', '\u{0}', '\u{0}']), + ('\u{10cc2}', ['\u{10c82}', '\u{0}', '\u{0}']), + ('\u{10cc3}', ['\u{10c83}', '\u{0}', '\u{0}']), + ('\u{10cc4}', ['\u{10c84}', '\u{0}', '\u{0}']), + ('\u{10cc5}', ['\u{10c85}', '\u{0}', '\u{0}']), + ('\u{10cc6}', ['\u{10c86}', '\u{0}', '\u{0}']), + ('\u{10cc7}', ['\u{10c87}', '\u{0}', '\u{0}']), + ('\u{10cc8}', ['\u{10c88}', '\u{0}', '\u{0}']), + ('\u{10cc9}', ['\u{10c89}', '\u{0}', '\u{0}']), + ('\u{10cca}', ['\u{10c8a}', '\u{0}', '\u{0}']), + ('\u{10ccb}', ['\u{10c8b}', '\u{0}', '\u{0}']), + ('\u{10ccc}', ['\u{10c8c}', '\u{0}', '\u{0}']), + ('\u{10ccd}', ['\u{10c8d}', '\u{0}', '\u{0}']), + ('\u{10cce}', ['\u{10c8e}', '\u{0}', '\u{0}']), + ('\u{10ccf}', ['\u{10c8f}', '\u{0}', '\u{0}']), + ('\u{10cd0}', ['\u{10c90}', '\u{0}', '\u{0}']), + ('\u{10cd1}', ['\u{10c91}', '\u{0}', '\u{0}']), + ('\u{10cd2}', ['\u{10c92}', '\u{0}', '\u{0}']), + ('\u{10cd3}', ['\u{10c93}', '\u{0}', '\u{0}']), + ('\u{10cd4}', ['\u{10c94}', '\u{0}', '\u{0}']), + ('\u{10cd5}', ['\u{10c95}', '\u{0}', '\u{0}']), + ('\u{10cd6}', ['\u{10c96}', '\u{0}', '\u{0}']), + ('\u{10cd7}', ['\u{10c97}', '\u{0}', '\u{0}']), + ('\u{10cd8}', ['\u{10c98}', '\u{0}', '\u{0}']), + ('\u{10cd9}', ['\u{10c99}', '\u{0}', '\u{0}']), + ('\u{10cda}', ['\u{10c9a}', '\u{0}', '\u{0}']), + ('\u{10cdb}', ['\u{10c9b}', '\u{0}', '\u{0}']), + ('\u{10cdc}', ['\u{10c9c}', '\u{0}', '\u{0}']), + ('\u{10cdd}', ['\u{10c9d}', '\u{0}', '\u{0}']), + ('\u{10cde}', ['\u{10c9e}', '\u{0}', '\u{0}']), + ('\u{10cdf}', ['\u{10c9f}', '\u{0}', '\u{0}']), + ('\u{10ce0}', ['\u{10ca0}', '\u{0}', '\u{0}']), + ('\u{10ce1}', ['\u{10ca1}', '\u{0}', '\u{0}']), + ('\u{10ce2}', ['\u{10ca2}', '\u{0}', '\u{0}']), + ('\u{10ce3}', ['\u{10ca3}', '\u{0}', '\u{0}']), + ('\u{10ce4}', ['\u{10ca4}', '\u{0}', '\u{0}']), + ('\u{10ce5}', ['\u{10ca5}', '\u{0}', '\u{0}']), + ('\u{10ce6}', ['\u{10ca6}', '\u{0}', '\u{0}']), + ('\u{10ce7}', ['\u{10ca7}', '\u{0}', '\u{0}']), + ('\u{10ce8}', ['\u{10ca8}', '\u{0}', '\u{0}']), + ('\u{10ce9}', ['\u{10ca9}', '\u{0}', '\u{0}']), + ('\u{10cea}', ['\u{10caa}', '\u{0}', '\u{0}']), + ('\u{10ceb}', ['\u{10cab}', '\u{0}', '\u{0}']), + ('\u{10cec}', ['\u{10cac}', '\u{0}', '\u{0}']), + ('\u{10ced}', ['\u{10cad}', '\u{0}', '\u{0}']), + ('\u{10cee}', ['\u{10cae}', '\u{0}', '\u{0}']), + ('\u{10cef}', ['\u{10caf}', '\u{0}', '\u{0}']), + ('\u{10cf0}', ['\u{10cb0}', '\u{0}', '\u{0}']), + ('\u{10cf1}', ['\u{10cb1}', '\u{0}', '\u{0}']), + ('\u{10cf2}', ['\u{10cb2}', '\u{0}', '\u{0}']), + ('\u{10d70}', ['\u{10d50}', '\u{0}', '\u{0}']), + ('\u{10d71}', ['\u{10d51}', '\u{0}', '\u{0}']), + ('\u{10d72}', ['\u{10d52}', '\u{0}', '\u{0}']), + ('\u{10d73}', ['\u{10d53}', '\u{0}', '\u{0}']), + ('\u{10d74}', ['\u{10d54}', '\u{0}', '\u{0}']), + ('\u{10d75}', ['\u{10d55}', '\u{0}', '\u{0}']), + ('\u{10d76}', ['\u{10d56}', '\u{0}', '\u{0}']), + ('\u{10d77}', ['\u{10d57}', '\u{0}', '\u{0}']), + ('\u{10d78}', ['\u{10d58}', '\u{0}', '\u{0}']), + ('\u{10d79}', ['\u{10d59}', '\u{0}', '\u{0}']), + ('\u{10d7a}', ['\u{10d5a}', '\u{0}', '\u{0}']), + ('\u{10d7b}', ['\u{10d5b}', '\u{0}', '\u{0}']), + ('\u{10d7c}', ['\u{10d5c}', '\u{0}', '\u{0}']), + ('\u{10d7d}', ['\u{10d5d}', '\u{0}', '\u{0}']), + ('\u{10d7e}', ['\u{10d5e}', '\u{0}', '\u{0}']), + ('\u{10d7f}', ['\u{10d5f}', '\u{0}', '\u{0}']), + ('\u{10d80}', ['\u{10d60}', '\u{0}', '\u{0}']), + ('\u{10d81}', ['\u{10d61}', '\u{0}', '\u{0}']), + ('\u{10d82}', ['\u{10d62}', '\u{0}', '\u{0}']), + ('\u{10d83}', ['\u{10d63}', '\u{0}', '\u{0}']), + ('\u{10d84}', ['\u{10d64}', '\u{0}', '\u{0}']), + ('\u{10d85}', ['\u{10d65}', '\u{0}', '\u{0}']), + ('\u{118c0}', ['\u{118a0}', '\u{0}', '\u{0}']), + ('\u{118c1}', ['\u{118a1}', '\u{0}', '\u{0}']), + ('\u{118c2}', ['\u{118a2}', '\u{0}', '\u{0}']), + ('\u{118c3}', ['\u{118a3}', '\u{0}', '\u{0}']), + ('\u{118c4}', ['\u{118a4}', '\u{0}', '\u{0}']), + ('\u{118c5}', ['\u{118a5}', '\u{0}', '\u{0}']), + ('\u{118c6}', ['\u{118a6}', '\u{0}', '\u{0}']), + ('\u{118c7}', ['\u{118a7}', '\u{0}', '\u{0}']), + ('\u{118c8}', ['\u{118a8}', '\u{0}', '\u{0}']), + ('\u{118c9}', ['\u{118a9}', '\u{0}', '\u{0}']), + ('\u{118ca}', ['\u{118aa}', '\u{0}', '\u{0}']), + ('\u{118cb}', ['\u{118ab}', '\u{0}', '\u{0}']), + ('\u{118cc}', ['\u{118ac}', '\u{0}', '\u{0}']), + ('\u{118cd}', ['\u{118ad}', '\u{0}', '\u{0}']), + ('\u{118ce}', ['\u{118ae}', '\u{0}', '\u{0}']), + ('\u{118cf}', ['\u{118af}', '\u{0}', '\u{0}']), + ('\u{118d0}', ['\u{118b0}', '\u{0}', '\u{0}']), + ('\u{118d1}', ['\u{118b1}', '\u{0}', '\u{0}']), + ('\u{118d2}', ['\u{118b2}', '\u{0}', '\u{0}']), + ('\u{118d3}', ['\u{118b3}', '\u{0}', '\u{0}']), + ('\u{118d4}', ['\u{118b4}', '\u{0}', '\u{0}']), + ('\u{118d5}', ['\u{118b5}', '\u{0}', '\u{0}']), + ('\u{118d6}', ['\u{118b6}', '\u{0}', '\u{0}']), + ('\u{118d7}', ['\u{118b7}', '\u{0}', '\u{0}']), + ('\u{118d8}', ['\u{118b8}', '\u{0}', '\u{0}']), + ('\u{118d9}', ['\u{118b9}', '\u{0}', '\u{0}']), + ('\u{118da}', ['\u{118ba}', '\u{0}', '\u{0}']), + ('\u{118db}', ['\u{118bb}', '\u{0}', '\u{0}']), + ('\u{118dc}', ['\u{118bc}', '\u{0}', '\u{0}']), + ('\u{118dd}', ['\u{118bd}', '\u{0}', '\u{0}']), + ('\u{118de}', ['\u{118be}', '\u{0}', '\u{0}']), + ('\u{118df}', ['\u{118bf}', '\u{0}', '\u{0}']), + ('\u{16e60}', ['\u{16e40}', '\u{0}', '\u{0}']), + ('\u{16e61}', ['\u{16e41}', '\u{0}', '\u{0}']), + ('\u{16e62}', ['\u{16e42}', '\u{0}', '\u{0}']), + ('\u{16e63}', ['\u{16e43}', '\u{0}', '\u{0}']), + ('\u{16e64}', ['\u{16e44}', '\u{0}', '\u{0}']), + ('\u{16e65}', ['\u{16e45}', '\u{0}', '\u{0}']), + ('\u{16e66}', ['\u{16e46}', '\u{0}', '\u{0}']), + ('\u{16e67}', ['\u{16e47}', '\u{0}', '\u{0}']), + ('\u{16e68}', ['\u{16e48}', '\u{0}', '\u{0}']), + ('\u{16e69}', ['\u{16e49}', '\u{0}', '\u{0}']), + ('\u{16e6a}', ['\u{16e4a}', '\u{0}', '\u{0}']), + ('\u{16e6b}', ['\u{16e4b}', '\u{0}', '\u{0}']), + ('\u{16e6c}', ['\u{16e4c}', '\u{0}', '\u{0}']), + ('\u{16e6d}', ['\u{16e4d}', '\u{0}', '\u{0}']), + ('\u{16e6e}', ['\u{16e4e}', '\u{0}', '\u{0}']), + ('\u{16e6f}', ['\u{16e4f}', '\u{0}', '\u{0}']), + ('\u{16e70}', ['\u{16e50}', '\u{0}', '\u{0}']), + ('\u{16e71}', ['\u{16e51}', '\u{0}', '\u{0}']), + ('\u{16e72}', ['\u{16e52}', '\u{0}', '\u{0}']), + ('\u{16e73}', ['\u{16e53}', '\u{0}', '\u{0}']), + ('\u{16e74}', ['\u{16e54}', '\u{0}', '\u{0}']), + ('\u{16e75}', ['\u{16e55}', '\u{0}', '\u{0}']), + ('\u{16e76}', ['\u{16e56}', '\u{0}', '\u{0}']), + ('\u{16e77}', ['\u{16e57}', '\u{0}', '\u{0}']), + ('\u{16e78}', ['\u{16e58}', '\u{0}', '\u{0}']), + ('\u{16e79}', ['\u{16e59}', '\u{0}', '\u{0}']), + ('\u{16e7a}', ['\u{16e5a}', '\u{0}', '\u{0}']), + ('\u{16e7b}', ['\u{16e5b}', '\u{0}', '\u{0}']), + ('\u{16e7c}', ['\u{16e5c}', '\u{0}', '\u{0}']), + ('\u{16e7d}', ['\u{16e5d}', '\u{0}', '\u{0}']), + ('\u{16e7e}', ['\u{16e5e}', '\u{0}', '\u{0}']), + ('\u{16e7f}', ['\u{16e5f}', '\u{0}', '\u{0}']), + ('\u{16ebb}', ['\u{16ea0}', '\u{0}', '\u{0}']), + ('\u{16ebc}', ['\u{16ea1}', '\u{0}', '\u{0}']), + ('\u{16ebd}', ['\u{16ea2}', '\u{0}', '\u{0}']), + ('\u{16ebe}', ['\u{16ea3}', '\u{0}', '\u{0}']), + ('\u{16ebf}', ['\u{16ea4}', '\u{0}', '\u{0}']), + ('\u{16ec0}', ['\u{16ea5}', '\u{0}', '\u{0}']), + ('\u{16ec1}', ['\u{16ea6}', '\u{0}', '\u{0}']), + ('\u{16ec2}', ['\u{16ea7}', '\u{0}', '\u{0}']), + ('\u{16ec3}', ['\u{16ea8}', '\u{0}', '\u{0}']), + ('\u{16ec4}', ['\u{16ea9}', '\u{0}', '\u{0}']), + ('\u{16ec5}', ['\u{16eaa}', '\u{0}', '\u{0}']), + ('\u{16ec6}', ['\u{16eab}', '\u{0}', '\u{0}']), + ('\u{16ec7}', ['\u{16eac}', '\u{0}', '\u{0}']), + ('\u{16ec8}', ['\u{16ead}', '\u{0}', '\u{0}']), + ('\u{16ec9}', ['\u{16eae}', '\u{0}', '\u{0}']), + ('\u{16eca}', ['\u{16eaf}', '\u{0}', '\u{0}']), + ('\u{16ecb}', ['\u{16eb0}', '\u{0}', '\u{0}']), + ('\u{16ecc}', ['\u{16eb1}', '\u{0}', '\u{0}']), + ('\u{16ecd}', ['\u{16eb2}', '\u{0}', '\u{0}']), + ('\u{16ece}', ['\u{16eb3}', '\u{0}', '\u{0}']), + ('\u{16ecf}', ['\u{16eb4}', '\u{0}', '\u{0}']), + ('\u{16ed0}', ['\u{16eb5}', '\u{0}', '\u{0}']), + ('\u{16ed1}', ['\u{16eb6}', '\u{0}', '\u{0}']), + ('\u{16ed2}', ['\u{16eb7}', '\u{0}', '\u{0}']), + ('\u{16ed3}', ['\u{16eb8}', '\u{0}', '\u{0}']), + ('\u{1e922}', ['\u{1e900}', '\u{0}', '\u{0}']), + ('\u{1e923}', ['\u{1e901}', '\u{0}', '\u{0}']), + ('\u{1e924}', ['\u{1e902}', '\u{0}', '\u{0}']), + ('\u{1e925}', ['\u{1e903}', '\u{0}', '\u{0}']), + ('\u{1e926}', ['\u{1e904}', '\u{0}', '\u{0}']), + ('\u{1e927}', ['\u{1e905}', '\u{0}', '\u{0}']), + ('\u{1e928}', ['\u{1e906}', '\u{0}', '\u{0}']), + ('\u{1e929}', ['\u{1e907}', '\u{0}', '\u{0}']), + ('\u{1e92a}', ['\u{1e908}', '\u{0}', '\u{0}']), + ('\u{1e92b}', ['\u{1e909}', '\u{0}', '\u{0}']), + ('\u{1e92c}', ['\u{1e90a}', '\u{0}', '\u{0}']), + ('\u{1e92d}', ['\u{1e90b}', '\u{0}', '\u{0}']), + ('\u{1e92e}', ['\u{1e90c}', '\u{0}', '\u{0}']), + ('\u{1e92f}', ['\u{1e90d}', '\u{0}', '\u{0}']), + ('\u{1e930}', ['\u{1e90e}', '\u{0}', '\u{0}']), + ('\u{1e931}', ['\u{1e90f}', '\u{0}', '\u{0}']), + ('\u{1e932}', ['\u{1e910}', '\u{0}', '\u{0}']), + ('\u{1e933}', ['\u{1e911}', '\u{0}', '\u{0}']), + ('\u{1e934}', ['\u{1e912}', '\u{0}', '\u{0}']), + ('\u{1e935}', ['\u{1e913}', '\u{0}', '\u{0}']), + ('\u{1e936}', ['\u{1e914}', '\u{0}', '\u{0}']), + ('\u{1e937}', ['\u{1e915}', '\u{0}', '\u{0}']), + ('\u{1e938}', ['\u{1e916}', '\u{0}', '\u{0}']), + ('\u{1e939}', ['\u{1e917}', '\u{0}', '\u{0}']), + ('\u{1e93a}', ['\u{1e918}', '\u{0}', '\u{0}']), + ('\u{1e93b}', ['\u{1e919}', '\u{0}', '\u{0}']), + ('\u{1e93c}', ['\u{1e91a}', '\u{0}', '\u{0}']), + ('\u{1e93d}', ['\u{1e91b}', '\u{0}', '\u{0}']), + ('\u{1e93e}', ['\u{1e91c}', '\u{0}', '\u{0}']), + ('\u{1e93f}', ['\u{1e91d}', '\u{0}', '\u{0}']), + ('\u{1e940}', ['\u{1e91e}', '\u{0}', '\u{0}']), + ('\u{1e941}', ['\u{1e91f}', '\u{0}', '\u{0}']), + ('\u{1e942}', ['\u{1e920}', '\u{0}', '\u{0}']), + ('\u{1e943}', ['\u{1e921}', '\u{0}', '\u{0}']), +]; From 863241e0dada0ed1dd9bb08402602566d8407912 Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Mon, 6 Oct 2025 15:54:15 -0400 Subject: [PATCH 248/358] add {Box, (Unique){Rc, Arc}}::(try_)map --- alloc/src/boxed.rs | 80 ++++++++++++++ alloc/src/lib.rs | 2 + alloc/src/rc.rs | 253 +++++++++++++++++++++++++++++++++++++++++++++ alloc/src/sync.rs | 253 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 588 insertions(+) diff --git a/alloc/src/boxed.rs b/alloc/src/boxed.rs index 49ff768bed1b2..bee8c6c1d4d56 100644 --- a/alloc/src/boxed.rs +++ b/alloc/src/boxed.rs @@ -192,11 +192,15 @@ use core::fmt; use core::future::Future; use core::hash::{Hash, Hasher}; use core::marker::{Tuple, Unsize}; +#[cfg(not(no_global_oom_handling))] +use core::mem::MaybeUninit; use core::mem::{self, SizedTypeProperties}; use core::ops::{ AsyncFn, AsyncFnMut, AsyncFnOnce, CoerceUnsized, Coroutine, CoroutineState, Deref, DerefMut, DerefPure, DispatchFromDyn, LegacyReceiver, }; +#[cfg(not(no_global_oom_handling))] +use core::ops::{Residual, Try}; use core::pin::{Pin, PinCoerceUnsized}; use core::ptr::{self, NonNull, Unique}; use core::task::{Context, Poll}; @@ -385,6 +389,82 @@ impl Box { pub fn try_new_zeroed() -> Result>, AllocError> { Box::try_new_zeroed_in(Global) } + + /// Maps the value in a box, reusing the allocation if possible. + /// + /// `f` is called on the value in the box, and the result is returned, also boxed. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::map(b, f)` instead of `b.map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// + /// let b = Box::new(7); + /// let new = Box::map(b, |i| i + 7); + /// assert_eq!(*new, 14); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn map(this: Self, f: impl FnOnce(T) -> U) -> Box { + if size_of::() == size_of::() && align_of::() == align_of::() { + let (value, allocation) = Box::take(this); + Box::write( + unsafe { mem::transmute::>, Box>>(allocation) }, + f(value), + ) + } else { + Box::new(f(*this)) + } + } + + /// Attempts to map the value in a box, reusing the allocation if possible. + /// + /// `f` is called on the value in the box, and if the operation succeeds, the result is + /// returned, also boxed. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::try_map(b, f)` instead of `b.try_map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// + /// let b = Box::new(7); + /// let new = Box::try_map(b, u32::try_from).unwrap(); + /// assert_eq!(*new, 7); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn try_map( + this: Self, + f: impl FnOnce(T) -> R, + ) -> >>::TryType + where + R: Try, + R::Residual: Residual>, + { + if size_of::() == size_of::() && align_of::() == align_of::() { + let (value, allocation) = Box::take(this); + try { + Box::write( + unsafe { + mem::transmute::>, Box>>( + allocation, + ) + }, + f(value)?, + ) + } + } else { + try { Box::new(f(*this)?) } + } + } } impl Box { diff --git a/alloc/src/lib.rs b/alloc/src/lib.rs index 87ad5b0ce30e6..ec3a421d69a6c 100644 --- a/alloc/src/lib.rs +++ b/alloc/src/lib.rs @@ -146,7 +146,9 @@ #![feature(trusted_fused)] #![feature(trusted_len)] #![feature(trusted_random_access)] +#![feature(try_blocks)] #![feature(try_trait_v2)] +#![feature(try_trait_v2_residual)] #![feature(try_with_capacity)] #![feature(tuple_trait)] #![feature(ub_checks)] diff --git a/alloc/src/rc.rs b/alloc/src/rc.rs index 2b62b92d43886..a9cc2b262dd62 100644 --- a/alloc/src/rc.rs +++ b/alloc/src/rc.rs @@ -255,6 +255,8 @@ use core::marker::{PhantomData, Unsize}; use core::mem::{self, ManuallyDrop, align_of_val_raw}; use core::num::NonZeroUsize; use core::ops::{CoerceUnsized, Deref, DerefMut, DerefPure, DispatchFromDyn, LegacyReceiver}; +#[cfg(not(no_global_oom_handling))] +use core::ops::{Residual, Try}; use core::panic::{RefUnwindSafe, UnwindSafe}; #[cfg(not(no_global_oom_handling))] use core::pin::Pin; @@ -639,6 +641,93 @@ impl Rc { pub fn pin(value: T) -> Pin> { unsafe { Pin::new_unchecked(Rc::new(value)) } } + + /// Maps the value in an `Rc`, reusing the allocation if possible. + /// + /// `f` is called on a reference to the value in the `Rc`, and the result is returned, also in + /// an `Rc`. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Rc::map(r, f)` instead of `r.map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// + /// use std::rc::Rc; + /// + /// let r = Rc::new(7); + /// let new = Rc::map(r, |i| i + 7); + /// assert_eq!(*new, 14); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn map(this: Self, f: impl FnOnce(&T) -> U) -> Rc { + if size_of::() == size_of::() + && align_of::() == align_of::() + && Rc::is_unique(&this) + { + unsafe { + let ptr = Rc::into_raw(this); + let value = ptr.read(); + let mut allocation = Rc::from_raw(ptr.cast::>()); + + Rc::get_mut_unchecked(&mut allocation).write(f(&value)); + allocation.assume_init() + } + } else { + Rc::new(f(&*this)) + } + } + + /// Attempts to map the value in an `Rc`, reusing the allocation if possible. + /// + /// `f` is called on a reference to the value in the `Rc`, and if the operation succeeds, the + /// result is returned, also in an `Rc`. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Rc::try_map(r, f)` instead of `r.try_map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// + /// use std::rc::Rc; + /// + /// let b = Rc::new(7); + /// let new = Rc::try_map(b, |&i| u32::try_from(i)).unwrap(); + /// assert_eq!(*new, 7); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn try_map( + this: Self, + f: impl FnOnce(&T) -> R, + ) -> >>::TryType + where + R: Try, + R::Residual: Residual>, + { + if size_of::() == size_of::() + && align_of::() == align_of::() + && Rc::is_unique(&this) + { + unsafe { + let ptr = Rc::into_raw(this); + let value = ptr.read(); + let mut allocation = Rc::from_raw(ptr.cast::>()); + + Rc::get_mut_unchecked(&mut allocation).write(f(&value)?); + try { allocation.assume_init() } + } + } else { + try { Rc::new(f(&*this)?) } + } + } } impl Rc { @@ -3991,6 +4080,128 @@ impl UniqueRc { pub fn new(value: T) -> Self { Self::new_in(value, Global) } + + /// Maps the value in a `UniqueRc`, reusing the allocation if possible. + /// + /// `f` is called on a reference to the value in the `UniqueRc`, and the result is returned, + /// also in a `UniqueRc`. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `UniqueRc::map(u, f)` instead of `u.map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// #![feature(unique_rc_arc)] + /// + /// use std::rc::UniqueRc; + /// + /// let r = UniqueRc::new(7); + /// let new = UniqueRc::map(r, |i| i + 7); + /// assert_eq!(*new, 14); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn map(this: Self, f: impl FnOnce(T) -> U) -> UniqueRc { + if size_of::() == size_of::() + && align_of::() == align_of::() + && UniqueRc::weak_count(&this) == 0 + { + unsafe { + let ptr = UniqueRc::into_raw(this); + let value = ptr.read(); + let mut allocation = UniqueRc::from_raw(ptr.cast::>()); + + allocation.write(f(value)); + allocation.assume_init() + } + } else { + UniqueRc::new(f(UniqueRc::unwrap(this))) + } + } + + /// Attempts to map the value in a `UniqueRc`, reusing the allocation if possible. + /// + /// `f` is called on a reference to the value in the `UniqueRc`, and if the operation succeeds, + /// the result is returned, also in a `UniqueRc`. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `UniqueRc::try_map(u, f)` instead of `u.try_map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// #![feature(unique_rc_arc)] + /// + /// use std::rc::UniqueRc; + /// + /// let b = UniqueRc::new(7); + /// let new = UniqueRc::try_map(b, u32::try_from).unwrap(); + /// assert_eq!(*new, 7); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn try_map( + this: Self, + f: impl FnOnce(T) -> R, + ) -> >>::TryType + where + R: Try, + R::Residual: Residual>, + { + if size_of::() == size_of::() + && align_of::() == align_of::() + && UniqueRc::weak_count(&this) == 0 + { + unsafe { + let ptr = UniqueRc::into_raw(this); + let value = ptr.read(); + let mut allocation = UniqueRc::from_raw(ptr.cast::>()); + + allocation.write(f(value)?); + try { allocation.assume_init() } + } + } else { + try { UniqueRc::new(f(UniqueRc::unwrap(this))?) } + } + } + + #[cfg(not(no_global_oom_handling))] + fn unwrap(this: Self) -> T { + let this = ManuallyDrop::new(this); + let val: T = unsafe { ptr::read(&**this) }; + + let _weak = Weak { ptr: this.ptr, alloc: Global }; + + val + } +} + +impl UniqueRc { + #[cfg(not(no_global_oom_handling))] + unsafe fn from_raw(ptr: *const T) -> Self { + let offset = unsafe { data_offset(ptr) }; + + // Reverse the offset to find the original RcInner. + let rc_ptr = unsafe { ptr.byte_sub(offset) as *mut RcInner }; + + Self { + ptr: unsafe { NonNull::new_unchecked(rc_ptr) }, + _marker: PhantomData, + _marker2: PhantomData, + alloc: Global, + } + } + + #[cfg(not(no_global_oom_handling))] + fn into_raw(this: Self) -> *const T { + let this = ManuallyDrop::new(this); + Self::as_ptr(&*this) + } } impl UniqueRc { @@ -4041,6 +4252,40 @@ impl UniqueRc { Rc::from_inner_in(this.ptr, alloc) } } + + #[cfg(not(no_global_oom_handling))] + fn weak_count(this: &Self) -> usize { + this.inner().weak() - 1 + } + + #[cfg(not(no_global_oom_handling))] + fn inner(&self) -> &RcInner { + // SAFETY: while this UniqueRc is alive we're guaranteed that the inner pointer is valid. + unsafe { self.ptr.as_ref() } + } + + #[cfg(not(no_global_oom_handling))] + fn as_ptr(this: &Self) -> *const T { + let ptr: *mut RcInner = NonNull::as_ptr(this.ptr); + + // SAFETY: This cannot go through Deref::deref or UniqueRc::inner because + // this is required to retain raw/mut provenance such that e.g. `get_mut` can + // write through the pointer after the Rc is recovered through `from_raw`. + unsafe { &raw mut (*ptr).value } + } + + #[inline] + #[cfg(not(no_global_oom_handling))] + fn into_inner_with_allocator(this: Self) -> (NonNull>, A) { + let this = mem::ManuallyDrop::new(this); + (this.ptr, unsafe { ptr::read(&this.alloc) }) + } + + #[inline] + #[cfg(not(no_global_oom_handling))] + unsafe fn from_inner_in(ptr: NonNull>, alloc: A) -> Self { + Self { ptr, _marker: PhantomData, _marker2: PhantomData, alloc } + } } impl UniqueRc { @@ -4059,6 +4304,14 @@ impl UniqueRc { } } +#[cfg(not(no_global_oom_handling))] +impl UniqueRc, A> { + unsafe fn assume_init(self) -> UniqueRc { + let (ptr, alloc) = UniqueRc::into_inner_with_allocator(self); + unsafe { UniqueRc::from_inner_in(ptr.cast(), alloc) } + } +} + #[unstable(feature = "unique_rc_arc", issue = "112566")] impl Deref for UniqueRc { type Target = T; diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs index 5927d03646928..cf2815568af90 100644 --- a/alloc/src/sync.rs +++ b/alloc/src/sync.rs @@ -22,6 +22,8 @@ use core::marker::{PhantomData, Unsize}; use core::mem::{self, ManuallyDrop, align_of_val_raw}; use core::num::NonZeroUsize; use core::ops::{CoerceUnsized, Deref, DerefMut, DerefPure, DispatchFromDyn, LegacyReceiver}; +#[cfg(not(no_global_oom_handling))] +use core::ops::{Residual, Try}; use core::panic::{RefUnwindSafe, UnwindSafe}; use core::pin::{Pin, PinCoerceUnsized}; use core::ptr::{self, NonNull}; @@ -650,6 +652,93 @@ impl Arc { )?)) } } + + /// Maps the value in an `Arc`, reusing the allocation if possible. + /// + /// `f` is called on a reference to the value in the `Arc`, and the result is returned, also in + /// an `Arc`. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Arc::map(a, f)` instead of `r.map(a)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// + /// use std::sync::Arc; + /// + /// let r = Arc::new(7); + /// let new = Arc::map(r, |i| i + 7); + /// assert_eq!(*new, 14); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn map(this: Self, f: impl FnOnce(&T) -> U) -> Arc { + if size_of::() == size_of::() + && align_of::() == align_of::() + && Arc::is_unique(&this) + { + unsafe { + let ptr = Arc::into_raw(this); + let value = ptr.read(); + let mut allocation = Arc::from_raw(ptr.cast::>()); + + Arc::get_mut_unchecked(&mut allocation).write(f(&value)); + allocation.assume_init() + } + } else { + Arc::new(f(&*this)) + } + } + + /// Attempts to map the value in an `Arc`, reusing the allocation if possible. + /// + /// `f` is called on a reference to the value in the `Arc`, and if the operation succeeds, the + /// result is returned, also in an `Arc`. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Arc::try_map(a, f)` instead of `a.try_map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// + /// use std::sync::Arc; + /// + /// let b = Arc::new(7); + /// let new = Arc::try_map(b, |&i| u32::try_from(i)).unwrap(); + /// assert_eq!(*new, 7); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn try_map( + this: Self, + f: impl FnOnce(&T) -> R, + ) -> >>::TryType + where + R: Try, + R::Residual: Residual>, + { + if size_of::() == size_of::() + && align_of::() == align_of::() + && Arc::is_unique(&this) + { + unsafe { + let ptr = Arc::into_raw(this); + let value = ptr.read(); + let mut allocation = Arc::from_raw(ptr.cast::>()); + + Arc::get_mut_unchecked(&mut allocation).write(f(&value)?); + try { allocation.assume_init() } + } + } else { + try { Arc::new(f(&*this)?) } + } + } } impl Arc { @@ -4404,6 +4493,128 @@ impl UniqueArc { pub fn new(value: T) -> Self { Self::new_in(value, Global) } + + /// Maps the value in a `UniqueArc`, reusing the allocation if possible. + /// + /// `f` is called on a reference to the value in the `UniqueArc`, and the result is returned, + /// also in a `UniqueArc`. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `UniqueArc::map(u, f)` instead of `u.map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// #![feature(unique_rc_arc)] + /// + /// use std::sync::UniqueArc; + /// + /// let r = UniqueArc::new(7); + /// let new = UniqueArc::map(r, |i| i + 7); + /// assert_eq!(*new, 14); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn map(this: Self, f: impl FnOnce(T) -> U) -> UniqueArc { + if size_of::() == size_of::() + && align_of::() == align_of::() + && UniqueArc::weak_count(&this) == 0 + { + unsafe { + let ptr = UniqueArc::into_raw(this); + let value = ptr.read(); + let mut allocation = UniqueArc::from_raw(ptr.cast::>()); + + allocation.write(f(value)); + allocation.assume_init() + } + } else { + UniqueArc::new(f(UniqueArc::unwrap(this))) + } + } + + /// Attempts to map the value in a `UniqueArc`, reusing the allocation if possible. + /// + /// `f` is called on a reference to the value in the `UniqueArc`, and if the operation succeeds, + /// the result is returned, also in a `UniqueArc`. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `UniqueArc::try_map(u, f)` instead of `u.try_map(f)`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// ``` + /// #![feature(smart_pointer_try_map)] + /// #![feature(unique_rc_arc)] + /// + /// use std::sync::UniqueArc; + /// + /// let b = UniqueArc::new(7); + /// let new = UniqueArc::try_map(b, u32::try_from).unwrap(); + /// assert_eq!(*new, 7); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "smart_pointer_try_map", issue = "144419")] + pub fn try_map( + this: Self, + f: impl FnOnce(T) -> R, + ) -> >>::TryType + where + R: Try, + R::Residual: Residual>, + { + if size_of::() == size_of::() + && align_of::() == align_of::() + && UniqueArc::weak_count(&this) == 0 + { + unsafe { + let ptr = UniqueArc::into_raw(this); + let value = ptr.read(); + let mut allocation = UniqueArc::from_raw(ptr.cast::>()); + + allocation.write(f(value)?); + try { allocation.assume_init() } + } + } else { + try { UniqueArc::new(f(UniqueArc::unwrap(this))?) } + } + } + + #[cfg(not(no_global_oom_handling))] + fn unwrap(this: Self) -> T { + let this = ManuallyDrop::new(this); + let val: T = unsafe { ptr::read(&**this) }; + + let _weak = Weak { ptr: this.ptr, alloc: Global }; + + val + } +} + +impl UniqueArc { + #[cfg(not(no_global_oom_handling))] + unsafe fn from_raw(ptr: *const T) -> Self { + let offset = unsafe { data_offset(ptr) }; + + // Reverse the offset to find the original ArcInner. + let rc_ptr = unsafe { ptr.byte_sub(offset) as *mut ArcInner }; + + Self { + ptr: unsafe { NonNull::new_unchecked(rc_ptr) }, + _marker: PhantomData, + _marker2: PhantomData, + alloc: Global, + } + } + + #[cfg(not(no_global_oom_handling))] + fn into_raw(this: Self) -> *const T { + let this = ManuallyDrop::new(this); + Self::as_ptr(&*this) + } } impl UniqueArc { @@ -4457,6 +4668,40 @@ impl UniqueArc { Arc::from_inner_in(this.ptr, alloc) } } + + #[cfg(not(no_global_oom_handling))] + fn weak_count(this: &Self) -> usize { + this.inner().weak.load(Acquire) - 1 + } + + #[cfg(not(no_global_oom_handling))] + fn inner(&self) -> &ArcInner { + // SAFETY: while this UniqueArc is alive we're guaranteed that the inner pointer is valid. + unsafe { self.ptr.as_ref() } + } + + #[cfg(not(no_global_oom_handling))] + fn as_ptr(this: &Self) -> *const T { + let ptr: *mut ArcInner = NonNull::as_ptr(this.ptr); + + // SAFETY: This cannot go through Deref::deref or UniqueArc::inner because + // this is required to retain raw/mut provenance such that e.g. `get_mut` can + // write through the pointer after the Rc is recovered through `from_raw`. + unsafe { &raw mut (*ptr).data } + } + + #[inline] + #[cfg(not(no_global_oom_handling))] + fn into_inner_with_allocator(this: Self) -> (NonNull>, A) { + let this = mem::ManuallyDrop::new(this); + (this.ptr, unsafe { ptr::read(&this.alloc) }) + } + + #[inline] + #[cfg(not(no_global_oom_handling))] + unsafe fn from_inner_in(ptr: NonNull>, alloc: A) -> Self { + Self { ptr, _marker: PhantomData, _marker2: PhantomData, alloc } + } } impl UniqueArc { @@ -4487,6 +4732,14 @@ impl UniqueArc { } } +#[cfg(not(no_global_oom_handling))] +impl UniqueArc, A> { + unsafe fn assume_init(self) -> UniqueArc { + let (ptr, alloc) = UniqueArc::into_inner_with_allocator(self); + unsafe { UniqueArc::from_inner_in(ptr.cast(), alloc) } + } +} + #[unstable(feature = "unique_rc_arc", issue = "112566")] impl Deref for UniqueArc { type Target = T; From 5163600a876389e5968007cbc6619907391da0b0 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Fri, 31 Oct 2025 13:47:57 -0400 Subject: [PATCH 249/358] cleanup: upstream dropped amx-transpose functionality See also LLVM change 5322fb626820. Looks like this was just removed entirely. --- std_detect/src/detect/arch/x86.rs | 3 --- std_detect/src/detect/os/x86.rs | 1 - std_detect/tests/x86-specific.rs | 1 - 3 files changed, 5 deletions(-) diff --git a/std_detect/src/detect/arch/x86.rs b/std_detect/src/detect/arch/x86.rs index bd749b88f566d..e4318bedb2a6f 100644 --- a/std_detect/src/detect/arch/x86.rs +++ b/std_detect/src/detect/arch/x86.rs @@ -93,7 +93,6 @@ features! { /// * `"amx-fp8"` /// * `"amx-movrs"` /// * `"amx-tf32"` - /// * `"amx-transpose"` /// * `"f16c"` /// * `"fma"` /// * `"bmi1"` @@ -231,8 +230,6 @@ features! { /// AMX-MOVRS (Matrix MOVERS operations) @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32"; /// AMX-TF32 (TensorFloat32 Operations) - @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose"; - /// AMX-TRANSPOSE (Matrix Transpose Operations) @FEATURE: #[unstable(feature = "apx_target_feature", issue = "139284")] apxf: "apxf"; /// APX-F (Advanced Performance Extensions - Foundation) @FEATURE: #[unstable(feature = "avx10_target_feature", issue = "138843")] avx10_1: "avx10.1"; diff --git a/std_detect/src/detect/os/x86.rs b/std_detect/src/detect/os/x86.rs index cf11d8333127f..18925da2b2755 100644 --- a/std_detect/src/detect/os/x86.rs +++ b/std_detect/src/detect/os/x86.rs @@ -285,7 +285,6 @@ pub(crate) fn detect_features() -> cache::Initializer { unsafe { __cpuid_count(0x1e_u32, 1) }; enable(amx_feature_flags_eax, 4, Feature::amx_fp8); - enable(amx_feature_flags_eax, 5, Feature::amx_transpose); enable(amx_feature_flags_eax, 6, Feature::amx_tf32); enable(amx_feature_flags_eax, 7, Feature::amx_avx512); enable(amx_feature_flags_eax, 8, Feature::amx_movrs); diff --git a/std_detect/tests/x86-specific.rs b/std_detect/tests/x86-specific.rs index 2ed2bb2a99ecd..90ca32208e78d 100644 --- a/std_detect/tests/x86-specific.rs +++ b/std_detect/tests/x86-specific.rs @@ -76,7 +76,6 @@ fn dump() { println!("widekl: {:?}", is_x86_feature_detected!("widekl")); println!("movrs: {:?}", is_x86_feature_detected!("movrs")); println!("amx-fp8: {:?}", is_x86_feature_detected!("amx-fp8")); - println!("amx-transpose: {:?}", is_x86_feature_detected!("amx-transpose")); println!("amx-tf32: {:?}", is_x86_feature_detected!("amx-tf32")); println!("amx-avx512: {:?}", is_x86_feature_detected!("amx-avx512")); println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs")); From c2f7cdc7db8ff0f1a9d6bb419f6cc971c0498343 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 31 Oct 2025 14:49:45 -0400 Subject: [PATCH 250/358] revert combined nonpoison/poison tests for condvar Setup for writing different tests for the `nonpoison::Condvar` since it will have a different API. Signed-off-by: Connor Tsui --- std/tests/sync/condvar.rs | 472 ++++++++++++++++++-------------------- 1 file changed, 226 insertions(+), 246 deletions(-) diff --git a/std/tests/sync/condvar.rs b/std/tests/sync/condvar.rs index 42b880e283afe..74328793caeae 100644 --- a/std/tests/sync/condvar.rs +++ b/std/tests/sync/condvar.rs @@ -17,256 +17,237 @@ nonpoison_and_poison_unwrap_test!( } ); +#[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: notify_one, - test_body: { - use locks::{Condvar, Mutex}; +fn notify_one() { + use std::sync::{Condvar, Mutex}; - let m = Arc::new(Mutex::new(())); - let m2 = m.clone(); - let c = Arc::new(Condvar::new()); - let c2 = c.clone(); + let m = Arc::new(Mutex::new(())); + let m2 = m.clone(); + let c = Arc::new(Condvar::new()); + let c2 = c.clone(); - let g = maybe_unwrap(m.lock()); - let _t = thread::spawn(move || { - let _g = maybe_unwrap(m2.lock()); - c2.notify_one(); - }); - let g = maybe_unwrap(c.wait(g)); - drop(g); - } -); + let g = m.lock().unwrap(); + let _t = thread::spawn(move || { + let _g = m2.lock().unwrap(); + c2.notify_one(); + }); -#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: notify_all, - test_body: { - use locks::{Condvar, Mutex}; - - const N: usize = 10; - - let data = Arc::new((Mutex::new(0), Condvar::new())); - let (tx, rx) = channel(); - for _ in 0..N { - let data = data.clone(); - let tx = tx.clone(); - thread::spawn(move || { - let &(ref lock, ref cond) = &*data; - let mut cnt = maybe_unwrap(lock.lock()); - *cnt += 1; - if *cnt == N { - tx.send(()).unwrap(); - } - while *cnt != 0 { - cnt = maybe_unwrap(cond.wait(cnt)); - } - tx.send(()).unwrap(); - }); - } - drop(tx); - - let &(ref lock, ref cond) = &*data; - rx.recv().unwrap(); - let mut cnt = maybe_unwrap(lock.lock()); - *cnt = 0; - cond.notify_all(); - drop(cnt); - - for _ in 0..N { - rx.recv().unwrap(); - } - } -); + let g = c.wait(g).unwrap(); + drop(g); +} +#[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: test_mutex_arc_condvar, - test_body: { - use locks::{Condvar, Mutex}; - - struct Packet(Arc<(Mutex, Condvar)>); +fn notify_all() { + use std::sync::{Condvar, Mutex}; - let packet = Packet(Arc::new((Mutex::new(false), Condvar::new()))); - let packet2 = Packet(packet.0.clone()); + const N: usize = 10; - let (tx, rx) = channel(); - - let _t = thread::spawn(move || { - // Wait until our parent has taken the lock. - rx.recv().unwrap(); - let &(ref lock, ref cvar) = &*packet2.0; - - // Set the data to `true` and wake up our parent. - let mut guard = maybe_unwrap(lock.lock()); - *guard = true; - cvar.notify_one(); + let data = Arc::new((Mutex::new(0), Condvar::new())); + let (tx, rx) = channel(); + for _ in 0..N { + let data = data.clone(); + let tx = tx.clone(); + thread::spawn(move || { + let &(ref lock, ref cond) = &*data; + let mut cnt = lock.lock().unwrap(); + *cnt += 1; + if *cnt == N { + tx.send(()).unwrap(); + } + while *cnt != 0 { + cnt = cond.wait(cnt).unwrap(); + } + tx.send(()).unwrap(); }); + } + drop(tx); - let &(ref lock, ref cvar) = &*packet.0; - let mut guard = maybe_unwrap(lock.lock()); - // Wake up our child. - tx.send(()).unwrap(); + let &(ref lock, ref cond) = &*data; + rx.recv().unwrap(); + let mut cnt = lock.lock().unwrap(); + *cnt = 0; + cond.notify_all(); + drop(cnt); - // Wait until our child has set the data to `true`. - assert!(!*guard); - while !*guard { - guard = maybe_unwrap(cvar.wait(guard)); - } + for _ in 0..N { + rx.recv().unwrap(); } -); +} +#[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: wait_while, - test_body: { - use locks::{Condvar, Mutex}; +fn test_mutex_arc_condvar() { + use std::sync::{Condvar, Mutex}; - let pair = Arc::new((Mutex::new(false), Condvar::new())); - let pair2 = pair.clone(); + struct Packet(Arc<(Mutex, Condvar)>); - // Inside of our lock, spawn a new thread, and then wait for it to start. - thread::spawn(move || { - let &(ref lock, ref cvar) = &*pair2; - let mut started = maybe_unwrap(lock.lock()); - *started = true; - // We notify the condvar that the value has changed. - cvar.notify_one(); - }); + let packet = Packet(Arc::new((Mutex::new(false), Condvar::new()))); + let packet2 = Packet(packet.0.clone()); - // Wait for the thread to start up. - let &(ref lock, ref cvar) = &*pair; - let guard = cvar.wait_while(maybe_unwrap(lock.lock()), |started| !*started); - assert!(*maybe_unwrap(guard)); + let (tx, rx) = channel(); + + let _t = thread::spawn(move || { + // Wait until our parent has taken the lock. + rx.recv().unwrap(); + let &(ref lock, ref cvar) = &*packet2.0; + + // Set the data to `true` and wake up our parent. + let mut guard = lock.lock().unwrap(); + *guard = true; + cvar.notify_one(); + }); + + let &(ref lock, ref cvar) = &*packet.0; + let mut guard = lock.lock().unwrap(); + // Wake up our child. + tx.send(()).unwrap(); + + // Wait until our child has set the data to `true`. + assert!(!*guard); + while !*guard { + guard = cvar.wait(guard).unwrap(); } -); +} +#[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: wait_timeout_wait, - test_body: { - use locks::{Condvar, Mutex}; - - let m = Arc::new(Mutex::new(())); - let c = Arc::new(Condvar::new()); - - loop { - let g = maybe_unwrap(m.lock()); - let (_g, no_timeout) = maybe_unwrap(c.wait_timeout(g, Duration::from_millis(1))); - // spurious wakeups mean this isn't necessarily true - // so execute test again, if not timeout - if !no_timeout.timed_out() { - continue; - } - - break; +fn wait_while() { + use std::sync::{Condvar, Mutex}; + + let pair = Arc::new((Mutex::new(false), Condvar::new())); + let pair2 = pair.clone(); + + // Inside of our lock, spawn a new thread, and then wait for it to start. + thread::spawn(move || { + let &(ref lock, ref cvar) = &*pair2; + let mut started = lock.lock().unwrap(); + *started = true; + // We notify the condvar that the value has changed. + cvar.notify_one(); + }); + + // Wait for the thread to start up. + let &(ref lock, ref cvar) = &*pair; + let guard = cvar.wait_while(lock.lock().unwrap(), |started| !*started).unwrap(); + assert!(*guard); +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn wait_timeout_wait() { + use std::sync::{Condvar, Mutex}; + + let m = Arc::new(Mutex::new(())); + let c = Arc::new(Condvar::new()); + + loop { + let g = m.lock().unwrap(); + let (_g, no_timeout) = c.wait_timeout(g, Duration::from_millis(1)).unwrap(); + // spurious wakeups mean this isn't necessarily true + // so execute test again, if not timeout + if !no_timeout.timed_out() { + continue; } + + break; } -); +} +#[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: wait_timeout_while_wait, - test_body: { - use locks::{Condvar, Mutex}; +fn wait_timeout_while_wait() { + use std::sync::{Condvar, Mutex}; - let m = Arc::new(Mutex::new(())); - let c = Arc::new(Condvar::new()); + let m = Arc::new(Mutex::new(())); + let c = Arc::new(Condvar::new()); - let g = maybe_unwrap(m.lock()); - let (_g, wait) = maybe_unwrap(c.wait_timeout_while(g, Duration::from_millis(1), |_| true)); - // no spurious wakeups. ensure it timed-out - assert!(wait.timed_out()); - } -); + let g = m.lock().unwrap(); + let (_g, wait) = c.wait_timeout_while(g, Duration::from_millis(1), |_| true).unwrap(); + // no spurious wakeups. ensure it timed-out + assert!(wait.timed_out()); +} +#[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: wait_timeout_while_instant_satisfy, - test_body: { - use locks::{Condvar, Mutex}; +fn wait_timeout_while_instant_satisfy() { + use std::sync::{Condvar, Mutex}; - let m = Arc::new(Mutex::new(())); - let c = Arc::new(Condvar::new()); + let m = Arc::new(Mutex::new(())); + let c = Arc::new(Condvar::new()); - let g = maybe_unwrap(m.lock()); - let (_g, wait) = - maybe_unwrap(c.wait_timeout_while(g, Duration::from_millis(0), |_| false)); - // ensure it didn't time-out even if we were not given any time. - assert!(!wait.timed_out()); - } -); + let g = m.lock().unwrap(); + let (_g, wait) = c.wait_timeout_while(g, Duration::from_millis(0), |_| false).unwrap(); + // ensure it didn't time-out even if we were not given any time. + assert!(!wait.timed_out()); +} +#[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: wait_timeout_while_wake, - test_body: { - use locks::{Condvar, Mutex}; +fn wait_timeout_while_wake() { + use std::sync::{Condvar, Mutex}; + + let pair = Arc::new((Mutex::new(false), Condvar::new())); + let pair_copy = pair.clone(); + + let &(ref m, ref c) = &*pair; + let g = m.lock().unwrap(); + let _t = thread::spawn(move || { + let &(ref lock, ref cvar) = &*pair_copy; + let mut started = lock.lock().unwrap(); + thread::sleep(Duration::from_millis(1)); + *started = true; + cvar.notify_one(); + }); + + let (g2, wait) = c + .wait_timeout_while(g, Duration::from_millis(u64::MAX), |&mut notified| !notified) + .unwrap(); + // ensure it didn't time-out even if we were not given any time. + assert!(!wait.timed_out()); + assert!(*g2); +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn wait_timeout_wake() { + use std::sync::{Condvar, Mutex}; - let pair = Arc::new((Mutex::new(false), Condvar::new())); - let pair_copy = pair.clone(); + let m = Arc::new(Mutex::new(())); + let c = Arc::new(Condvar::new()); - let &(ref m, ref c) = &*pair; - let g = maybe_unwrap(m.lock()); - let _t = thread::spawn(move || { - let &(ref lock, ref cvar) = &*pair_copy; - let mut started = maybe_unwrap(lock.lock()); - thread::sleep(Duration::from_millis(1)); - *started = true; - cvar.notify_one(); - }); - let (g2, wait) = maybe_unwrap(c.wait_timeout_while( - g, - Duration::from_millis(u64::MAX), - |&mut notified| !notified - )); - // ensure it didn't time-out even if we were not given any time. - assert!(!wait.timed_out()); - assert!(*g2); - } -); + loop { + let g = m.lock().unwrap(); -#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -nonpoison_and_poison_unwrap_test!( - name: wait_timeout_wake, - test_body: { - use locks::{Condvar, Mutex}; + let c2 = c.clone(); + let m2 = m.clone(); - let m = Arc::new(Mutex::new(())); - let c = Arc::new(Condvar::new()); + let notified = Arc::new(AtomicBool::new(false)); + let notified_copy = notified.clone(); - loop { - let g = maybe_unwrap(m.lock()); - - let c2 = c.clone(); - let m2 = m.clone(); - - let notified = Arc::new(AtomicBool::new(false)); - let notified_copy = notified.clone(); - - let t = thread::spawn(move || { - let _g = maybe_unwrap(m2.lock()); - thread::sleep(Duration::from_millis(1)); - notified_copy.store(true, Ordering::Relaxed); - c2.notify_one(); - }); - let (g, timeout_res) = - maybe_unwrap(c.wait_timeout(g, Duration::from_millis(u64::MAX))); - assert!(!timeout_res.timed_out()); - // spurious wakeups mean this isn't necessarily true - // so execute test again, if not notified - if !notified.load(Ordering::Relaxed) { - t.join().unwrap(); - continue; - } - drop(g); + let t = thread::spawn(move || { + let _g = m2.lock().unwrap(); + thread::sleep(Duration::from_millis(1)); + notified_copy.store(true, Ordering::Relaxed); + c2.notify_one(); + }); + let (g, timeout_res) = c.wait_timeout(g, Duration::from_millis(u64::MAX)).unwrap(); + assert!(!timeout_res.timed_out()); + // spurious wakeups mean this isn't necessarily true + // so execute test again, if not notified + if !notified.load(Ordering::Relaxed) { t.join().unwrap(); - - break; + continue; } + drop(g); + + t.join().unwrap(); + + break; } -); +} // Some platforms internally cast the timeout duration into nanoseconds. // If they fail to consider overflow during the conversion (I'm looking @@ -274,42 +255,41 @@ nonpoison_and_poison_unwrap_test!( // timeout for durations that are slightly longer than u64::MAX nanoseconds. // `std` should guard against this by clamping the timeout. // See #37440 for context. -nonpoison_and_poison_unwrap_test!( - name: timeout_nanoseconds, - test_body: { - use locks::Mutex; - use locks::Condvar; +#[test] +fn timeout_nanoseconds() { + use std::sync::{Condvar, Mutex}; + + let sent = Mutex::new(false); + let cond = Condvar::new(); + + thread::scope(|s| { + s.spawn(|| { + // Sleep so that the other thread has a chance to encounter the + // timeout. + thread::sleep(Duration::from_secs(2)); + *sent.lock().unwrap() = true; + cond.notify_all(); + }); - let sent = Mutex::new(false); - let cond = Condvar::new(); - - thread::scope(|s| { - s.spawn(|| { - // Sleep so that the other thread has a chance to encounter the - // timeout. - thread::sleep(Duration::from_secs(2)); - maybe_unwrap(sent.set(true)); - cond.notify_all(); - }); - - let mut guard = maybe_unwrap(sent.lock()); - // Loop until `sent` is set by the thread to guard against spurious - // wakeups. If the `wait_timeout` happens just before the signal by - // the other thread, such a spurious wakeup might prevent the - // miscalculated timeout from occurring, but this is basically just - // a smoke test anyway. - loop { - if *guard { - break; - } - - // If there is internal overflow, this call will return almost - // immediately, before the other thread has reached the `notify_all`, - // and indicate a timeout. - let (g, res) = maybe_unwrap(cond.wait_timeout(guard, Duration::from_secs(u64::MAX.div_ceil(1_000_000_000)))); - assert!(!res.timed_out()); - guard = g; + let mut guard = sent.lock().unwrap(); + // Loop until `sent` is set by the thread to guard against spurious + // wakeups. If the `wait_timeout` happens just before the signal by + // the other thread, such a spurious wakeup might prevent the + // miscalculated timeout from occurring, but this is basically just + // a smoke test anyway. + loop { + if *guard { + break; } - }) - } -); + + // If there is internal overflow, this call will return almost + // immediately, before the other thread has reached the `notify_all`, + // and indicate a timeout. + let (g, res) = cond + .wait_timeout(guard, Duration::from_secs(u64::MAX.div_ceil(1_000_000_000))) + .unwrap(); + assert!(!res.timed_out()); + guard = g; + } + }) +} From 53761b567b08e6ffc75cb7b7b25ee5230fad303e Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 31 Oct 2025 15:12:46 -0400 Subject: [PATCH 251/358] update `nonpoison::Condvar` to take guards by reference Since non-poisoning `Condvar` take non-poisoing `Mutex`es when `wait`ing, we do not need to take by ownership since a poison error cannot occur while we wait. Signed-off-by: Connor Tsui --- std/src/sync/barrier.rs | 2 +- std/src/sync/nonpoison/condvar.rs | 71 ++++--- std/tests/sync/condvar.rs | 309 ++++++++++++++++++++++++++++-- 3 files changed, 324 insertions(+), 58 deletions(-) diff --git a/std/src/sync/barrier.rs b/std/src/sync/barrier.rs index 8988126bd90c0..c2c18889dde7d 100644 --- a/std/src/sync/barrier.rs +++ b/std/src/sync/barrier.rs @@ -125,7 +125,7 @@ impl Barrier { let local_gen = lock.generation_id; lock.count += 1; if lock.count < self.num_threads { - let _guard = self.cvar.wait_while(lock, |state| local_gen == state.generation_id); + self.cvar.wait_while(&mut lock, |state| local_gen == state.generation_id); BarrierWaitResult(false) } else { lock.count = 0; diff --git a/std/src/sync/nonpoison/condvar.rs b/std/src/sync/nonpoison/condvar.rs index 994fc6816a0d0..d2b251d7c44c1 100644 --- a/std/src/sync/nonpoison/condvar.rs +++ b/std/src/sync/nonpoison/condvar.rs @@ -1,4 +1,5 @@ use crate::fmt; +use crate::ops::DerefMut; use crate::sync::WaitTimeoutResult; use crate::sync::nonpoison::{MutexGuard, mutex}; use crate::sys::sync as sys; @@ -38,7 +39,7 @@ use crate::time::{Duration, Instant}; /// let (lock, cvar) = &*pair; /// let mut started = lock.lock(); /// while !*started { -/// started = cvar.wait(started); +/// cvar.wait(&mut started); /// } /// ``` /// @@ -115,16 +116,15 @@ impl Condvar { /// let mut started = lock.lock(); /// // As long as the value inside the `Mutex` is `false`, we wait. /// while !*started { - /// started = cvar.wait(started); + /// cvar.wait(&mut started); /// } /// ``` #[unstable(feature = "nonpoison_condvar", issue = "134645")] - pub fn wait<'a, T>(&self, guard: MutexGuard<'a, T>) -> MutexGuard<'a, T> { + pub fn wait(&self, guard: &mut MutexGuard<'_, T>) { unsafe { - let lock = mutex::guard_lock(&guard); + let lock = mutex::guard_lock(guard); self.inner.wait(lock); } - guard } /// Blocks the current thread until the provided condition becomes false. @@ -167,21 +167,17 @@ impl Condvar { /// // Wait for the thread to start up. /// let (lock, cvar) = &*pair; /// // As long as the value inside the `Mutex` is `true`, we wait. - /// let _guard = cvar.wait_while(lock.lock(), |pending| { *pending }); + /// let mut guard = lock.lock(); + /// cvar.wait_while(&mut guard, |pending| { *pending }); /// ``` #[unstable(feature = "nonpoison_condvar", issue = "134645")] - pub fn wait_while<'a, T, F>( - &self, - mut guard: MutexGuard<'a, T>, - mut condition: F, - ) -> MutexGuard<'a, T> + pub fn wait_while(&self, guard: &mut MutexGuard<'_, T>, mut condition: F) where F: FnMut(&mut T) -> bool, { - while condition(&mut *guard) { - guard = self.wait(guard); + while condition(guard.deref_mut()) { + self.wait(guard); } - guard } /// Waits on this condition variable for a notification, timing out after a @@ -206,7 +202,7 @@ impl Condvar { /// The returned [`WaitTimeoutResult`] value indicates if the timeout is /// known to have elapsed. /// - /// Like [`wait`], the lock specified will be re-acquired when this function + /// Like [`wait`], the lock specified will have been re-acquired when this function /// returns, regardless of whether the timeout elapsed or not. /// /// [`wait`]: Self::wait @@ -239,9 +235,8 @@ impl Condvar { /// let mut started = lock.lock(); /// // as long as the value inside the `Mutex` is `false`, we wait /// loop { - /// let result = cvar.wait_timeout(started, Duration::from_millis(10)); + /// let result = cvar.wait_timeout(&mut started, Duration::from_millis(10)); /// // 10 milliseconds have passed, or maybe the value changed! - /// started = result.0; /// if *started == true { /// // We received the notification and the value has been updated, we can leave. /// break @@ -249,16 +244,16 @@ impl Condvar { /// } /// ``` #[unstable(feature = "nonpoison_condvar", issue = "134645")] - pub fn wait_timeout<'a, T>( + pub fn wait_timeout( &self, - guard: MutexGuard<'a, T>, + guard: &mut MutexGuard<'_, T>, dur: Duration, - ) -> (MutexGuard<'a, T>, WaitTimeoutResult) { + ) -> WaitTimeoutResult { let success = unsafe { - let lock = mutex::guard_lock(&guard); + let lock = mutex::guard_lock(guard); self.inner.wait_timeout(lock, dur) }; - (guard, WaitTimeoutResult(!success)) + WaitTimeoutResult(!success) } /// Waits on this condition variable for a notification, timing out after a @@ -277,7 +272,7 @@ impl Condvar { /// The returned [`WaitTimeoutResult`] value indicates if the timeout is /// known to have elapsed without the condition being met. /// - /// Like [`wait_while`], the lock specified will be re-acquired when this + /// Like [`wait_while`], the lock specified will have been re-acquired when this /// function returns, regardless of whether the timeout elapsed or not. /// /// [`wait_while`]: Self::wait_while @@ -307,37 +302,39 @@ impl Condvar { /// /// // wait for the thread to start up /// let (lock, cvar) = &*pair; + /// let mut guard = lock.lock(); /// let result = cvar.wait_timeout_while( - /// lock.lock(), + /// &mut guard, /// Duration::from_millis(100), /// |&mut pending| pending, /// ); - /// if result.1.timed_out() { + /// if result.timed_out() { /// // timed-out without the condition ever evaluating to false. /// } - /// // access the locked mutex via result.0 + /// // access the locked mutex via guard /// ``` #[unstable(feature = "nonpoison_condvar", issue = "134645")] - pub fn wait_timeout_while<'a, T, F>( + pub fn wait_timeout_while( &self, - mut guard: MutexGuard<'a, T>, + guard: &mut MutexGuard<'_, T>, dur: Duration, mut condition: F, - ) -> (MutexGuard<'a, T>, WaitTimeoutResult) + ) -> WaitTimeoutResult where F: FnMut(&mut T) -> bool, { let start = Instant::now(); - loop { - if !condition(&mut *guard) { - return (guard, WaitTimeoutResult(false)); - } + + while condition(guard.deref_mut()) { let timeout = match dur.checked_sub(start.elapsed()) { Some(timeout) => timeout, - None => return (guard, WaitTimeoutResult(true)), + None => return WaitTimeoutResult(true), }; - guard = self.wait_timeout(guard, timeout).0; + + self.wait_timeout(guard, timeout); } + + WaitTimeoutResult(false) } /// Wakes up one blocked thread on this condvar. @@ -378,7 +375,7 @@ impl Condvar { /// let mut started = lock.lock(); /// // As long as the value inside the `Mutex` is `false`, we wait. /// while !*started { - /// started = cvar.wait(started); + /// cvar.wait(&mut started); /// } /// ``` #[unstable(feature = "nonpoison_condvar", issue = "134645")] @@ -422,7 +419,7 @@ impl Condvar { /// let mut started = lock.lock(); /// // As long as the value inside the `Mutex` is `false`, we wait. /// while !*started { - /// started = cvar.wait(started); + /// cvar.wait(&mut started); /// } /// ``` #[unstable(feature = "nonpoison_condvar", issue = "134645")] diff --git a/std/tests/sync/condvar.rs b/std/tests/sync/condvar.rs index 74328793caeae..a52e0a00caf48 100644 --- a/std/tests/sync/condvar.rs +++ b/std/tests/sync/condvar.rs @@ -19,8 +19,8 @@ nonpoison_and_poison_unwrap_test!( #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn notify_one() { - use std::sync::{Condvar, Mutex}; +fn poison_notify_one() { + use std::sync::poison::{Condvar, Mutex}; let m = Arc::new(Mutex::new(())); let m2 = m.clone(); @@ -39,8 +39,28 @@ fn notify_one() { #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn notify_all() { - use std::sync::{Condvar, Mutex}; +fn nonpoison_notify_one() { + use std::sync::nonpoison::{Condvar, Mutex}; + + let m = Arc::new(Mutex::new(())); + let m2 = m.clone(); + let c = Arc::new(Condvar::new()); + let c2 = c.clone(); + + let mut g = m.lock(); + let _t = thread::spawn(move || { + let _g = m2.lock(); + c2.notify_one(); + }); + + c.wait(&mut g); + drop(g); +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn poison_notify_all() { + use std::sync::poison::{Condvar, Mutex}; const N: usize = 10; @@ -78,8 +98,47 @@ fn notify_all() { #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn test_mutex_arc_condvar() { - use std::sync::{Condvar, Mutex}; +fn nonpoison_notify_all() { + use std::sync::nonpoison::{Condvar, Mutex}; + + const N: usize = 10; + + let data = Arc::new((Mutex::new(0), Condvar::new())); + let (tx, rx) = channel(); + for _ in 0..N { + let data = data.clone(); + let tx = tx.clone(); + thread::spawn(move || { + let &(ref lock, ref cond) = &*data; + let mut cnt = lock.lock(); + *cnt += 1; + if *cnt == N { + tx.send(()).unwrap(); + } + while *cnt != 0 { + cond.wait(&mut cnt); + } + tx.send(()).unwrap(); + }); + } + drop(tx); + + let &(ref lock, ref cond) = &*data; + rx.recv().unwrap(); + let mut cnt = lock.lock(); + *cnt = 0; + cond.notify_all(); + drop(cnt); + + for _ in 0..N { + rx.recv().unwrap(); + } +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn poison_test_mutex_arc_condvar() { + use std::sync::poison::{Condvar, Mutex}; struct Packet(Arc<(Mutex, Condvar)>); @@ -113,8 +172,43 @@ fn test_mutex_arc_condvar() { #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn wait_while() { - use std::sync::{Condvar, Mutex}; +fn nonpoison_test_mutex_arc_condvar() { + use std::sync::nonpoison::{Condvar, Mutex}; + + struct Packet(Arc<(Mutex, Condvar)>); + + let packet = Packet(Arc::new((Mutex::new(false), Condvar::new()))); + let packet2 = Packet(packet.0.clone()); + + let (tx, rx) = channel(); + + let _t = thread::spawn(move || { + // Wait until our parent has taken the lock. + rx.recv().unwrap(); + let &(ref lock, ref cvar) = &*packet2.0; + + // Set the data to `true` and wake up our parent. + let mut guard = lock.lock(); + *guard = true; + cvar.notify_one(); + }); + + let &(ref lock, ref cvar) = &*packet.0; + let mut guard = lock.lock(); + // Wake up our child. + tx.send(()).unwrap(); + + // Wait until our child has set the data to `true`. + assert!(!*guard); + while !*guard { + cvar.wait(&mut guard); + } +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn poison_wait_while() { + use std::sync::poison::{Condvar, Mutex}; let pair = Arc::new((Mutex::new(false), Condvar::new())); let pair2 = pair.clone(); @@ -136,8 +230,32 @@ fn wait_while() { #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn wait_timeout_wait() { - use std::sync::{Condvar, Mutex}; +fn nonpoison_wait_while() { + use std::sync::nonpoison::{Condvar, Mutex}; + + let pair = Arc::new((Mutex::new(false), Condvar::new())); + let pair2 = pair.clone(); + + // Inside of our lock, spawn a new thread, and then wait for it to start. + thread::spawn(move || { + let &(ref lock, ref cvar) = &*pair2; + let mut started = lock.lock(); + *started = true; + // We notify the condvar that the value has changed. + cvar.notify_one(); + }); + + // Wait for the thread to start up. + let &(ref lock, ref cvar) = &*pair; + let mut guard = lock.lock(); + cvar.wait_while(&mut guard, |started| !*started); + assert!(*guard); +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn poison_wait_timeout_wait() { + use std::sync::poison::{Condvar, Mutex}; let m = Arc::new(Mutex::new(())); let c = Arc::new(Condvar::new()); @@ -157,8 +275,29 @@ fn wait_timeout_wait() { #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn wait_timeout_while_wait() { - use std::sync::{Condvar, Mutex}; +fn nonpoison_wait_timeout_wait() { + use std::sync::nonpoison::{Condvar, Mutex}; + + let m = Arc::new(Mutex::new(())); + let c = Arc::new(Condvar::new()); + + loop { + let mut g = m.lock(); + let no_timeout = c.wait_timeout(&mut g, Duration::from_millis(1)); + // spurious wakeups mean this isn't necessarily true + // so execute test again, if not timeout + if !no_timeout.timed_out() { + continue; + } + + break; + } +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn poison_wait_timeout_while_wait() { + use std::sync::poison::{Condvar, Mutex}; let m = Arc::new(Mutex::new(())); let c = Arc::new(Condvar::new()); @@ -171,8 +310,22 @@ fn wait_timeout_while_wait() { #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn wait_timeout_while_instant_satisfy() { - use std::sync::{Condvar, Mutex}; +fn nonpoison_wait_timeout_while_wait() { + use std::sync::nonpoison::{Condvar, Mutex}; + + let m = Arc::new(Mutex::new(())); + let c = Arc::new(Condvar::new()); + + let mut g = m.lock(); + let wait = c.wait_timeout_while(&mut g, Duration::from_millis(1), |_| true); + // no spurious wakeups. ensure it timed-out + assert!(wait.timed_out()); +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn poison_wait_timeout_while_instant_satisfy() { + use std::sync::poison::{Condvar, Mutex}; let m = Arc::new(Mutex::new(())); let c = Arc::new(Condvar::new()); @@ -185,8 +338,22 @@ fn wait_timeout_while_instant_satisfy() { #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn wait_timeout_while_wake() { - use std::sync::{Condvar, Mutex}; +fn nonpoison_wait_timeout_while_instant_satisfy() { + use std::sync::nonpoison::{Condvar, Mutex}; + + let m = Arc::new(Mutex::new(())); + let c = Arc::new(Condvar::new()); + + let mut g = m.lock(); + let wait = c.wait_timeout_while(&mut g, Duration::from_millis(0), |_| false); + // ensure it didn't time-out even if we were not given any time. + assert!(!wait.timed_out()); +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn poison_wait_timeout_while_wake() { + use std::sync::poison::{Condvar, Mutex}; let pair = Arc::new((Mutex::new(false), Condvar::new())); let pair_copy = pair.clone(); @@ -211,8 +378,33 @@ fn wait_timeout_while_wake() { #[test] #[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. -fn wait_timeout_wake() { - use std::sync::{Condvar, Mutex}; +fn nonpoison_wait_timeout_while_wake() { + use std::sync::nonpoison::{Condvar, Mutex}; + + let pair = Arc::new((Mutex::new(false), Condvar::new())); + let pair_copy = pair.clone(); + + let &(ref m, ref c) = &*pair; + let mut g = m.lock(); + let _t = thread::spawn(move || { + let &(ref lock, ref cvar) = &*pair_copy; + let mut started = lock.lock(); + thread::sleep(Duration::from_millis(1)); + *started = true; + cvar.notify_one(); + }); + + let wait = + c.wait_timeout_while(&mut g, Duration::from_millis(u64::MAX), |&mut notified| !notified); + // ensure it didn't time-out even if we were not given any time. + assert!(!wait.timed_out()); + assert!(*g); +} + +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn poison_wait_timeout_wake() { + use std::sync::poison::{Condvar, Mutex}; let m = Arc::new(Mutex::new(())); let c = Arc::new(Condvar::new()); @@ -249,6 +441,46 @@ fn wait_timeout_wake() { } } +#[test] +#[cfg(not(any(target_os = "emscripten", target_os = "wasi")))] // No threads. +fn nonpoison_wait_timeout_wake() { + use std::sync::nonpoison::{Condvar, Mutex}; + + let m = Arc::new(Mutex::new(())); + let c = Arc::new(Condvar::new()); + + loop { + let mut g = m.lock(); + + let c2 = c.clone(); + let m2 = m.clone(); + + let notified = Arc::new(AtomicBool::new(false)); + let notified_copy = notified.clone(); + + let t = thread::spawn(move || { + let _g = m2.lock(); + thread::sleep(Duration::from_millis(1)); + notified_copy.store(true, Ordering::Relaxed); + c2.notify_one(); + }); + + let timeout_res = c.wait_timeout(&mut g, Duration::from_millis(u64::MAX)); + assert!(!timeout_res.timed_out()); + // spurious wakeups mean this isn't necessarily true + // so execute test again, if not notified + if !notified.load(Ordering::Relaxed) { + t.join().unwrap(); + continue; + } + drop(g); + + t.join().unwrap(); + + break; + } +} + // Some platforms internally cast the timeout duration into nanoseconds. // If they fail to consider overflow during the conversion (I'm looking // at you, macOS), `wait_timeout` will return immediately and indicate a @@ -256,8 +488,8 @@ fn wait_timeout_wake() { // `std` should guard against this by clamping the timeout. // See #37440 for context. #[test] -fn timeout_nanoseconds() { - use std::sync::{Condvar, Mutex}; +fn poison_timeout_nanoseconds() { + use std::sync::poison::{Condvar, Mutex}; let sent = Mutex::new(false); let cond = Condvar::new(); @@ -293,3 +525,40 @@ fn timeout_nanoseconds() { } }) } + +#[test] +fn nonpoison_timeout_nanoseconds() { + use std::sync::nonpoison::{Condvar, Mutex}; + + let sent = Mutex::new(false); + let cond = Condvar::new(); + + thread::scope(|s| { + s.spawn(|| { + // Sleep so that the other thread has a chance to encounter the + // timeout. + thread::sleep(Duration::from_secs(2)); + sent.set(true); + cond.notify_all(); + }); + + let mut guard = sent.lock(); + // Loop until `sent` is set by the thread to guard against spurious + // wakeups. If the `wait_timeout` happens just before the signal by + // the other thread, such a spurious wakeup might prevent the + // miscalculated timeout from occurring, but this is basically just + // a smoke test anyway. + loop { + if *guard { + break; + } + + // If there is internal overflow, this call will return almost + // immediately, before the other thread has reached the `notify_all`, + // and indicate a timeout. + let res = cond + .wait_timeout(&mut guard, Duration::from_secs(u64::MAX.div_ceil(1_000_000_000))); + assert!(!res.timed_out()); + } + }) +} From d359ac1cc6904e67e0b20ffa68591a3712b4f46d Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 31 Oct 2025 15:14:21 -0400 Subject: [PATCH 252/358] move condvar test from mutex to condvar test file Signed-off-by: Connor Tsui --- std/tests/sync/condvar.rs | 34 ++++++++++++++++++++++++++++++++++ std/tests/sync/mutex.rs | 34 +--------------------------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/std/tests/sync/condvar.rs b/std/tests/sync/condvar.rs index a52e0a00caf48..e5a7ad8f9b331 100644 --- a/std/tests/sync/condvar.rs +++ b/std/tests/sync/condvar.rs @@ -562,3 +562,37 @@ fn nonpoison_timeout_nanoseconds() { } }) } + +#[test] +#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")] +fn test_arc_condvar_poison() { + use std::sync::poison::{Condvar, Mutex}; + + struct Packet(Arc<(Mutex, Condvar)>); + + let packet = Packet(Arc::new((Mutex::new(1), Condvar::new()))); + let packet2 = Packet(packet.0.clone()); + let (tx, rx) = channel(); + + let _t = thread::spawn(move || -> () { + rx.recv().unwrap(); + let &(ref lock, ref cvar) = &*packet2.0; + let _g = lock.lock().unwrap(); + cvar.notify_one(); + // Parent should fail when it wakes up. + panic!(); + }); + + let &(ref lock, ref cvar) = &*packet.0; + let mut lock = lock.lock().unwrap(); + tx.send(()).unwrap(); + while *lock == 1 { + match cvar.wait(lock) { + Ok(l) => { + lock = l; + assert_eq!(*lock, 1); + } + Err(..) => break, + } + } +} diff --git a/std/tests/sync/mutex.rs b/std/tests/sync/mutex.rs index ff6aef717936f..75a6bf64607ef 100644 --- a/std/tests/sync/mutex.rs +++ b/std/tests/sync/mutex.rs @@ -3,7 +3,7 @@ use std::ops::FnMut; use std::panic::{self, AssertUnwindSafe}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::mpsc::channel; -use std::sync::{Arc, Condvar, MappedMutexGuard, Mutex, MutexGuard, TryLockError}; +use std::sync::{Arc, MappedMutexGuard, Mutex, MutexGuard, TryLockError}; use std::{hint, mem, thread}; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -423,38 +423,6 @@ fn test_replace_poison() { inner(|| NonCopyNeedsDrop(10), || NonCopyNeedsDrop(20)); } -#[test] -#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")] -fn test_arc_condvar_poison() { - struct Packet(Arc<(Mutex, Condvar)>); - - let packet = Packet(Arc::new((Mutex::new(1), Condvar::new()))); - let packet2 = Packet(packet.0.clone()); - let (tx, rx) = channel(); - - let _t = thread::spawn(move || -> () { - rx.recv().unwrap(); - let &(ref lock, ref cvar) = &*packet2.0; - let _g = lock.lock().unwrap(); - cvar.notify_one(); - // Parent should fail when it wakes up. - panic!(); - }); - - let &(ref lock, ref cvar) = &*packet.0; - let mut lock = lock.lock().unwrap(); - tx.send(()).unwrap(); - while *lock == 1 { - match cvar.wait(lock) { - Ok(l) => { - lock = l; - assert_eq!(*lock, 1); - } - Err(..) => break, - } - } -} - #[test] #[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")] fn test_mutex_arc_poison() { From e00d5f62b575eb89cf549e5b2572575ebd99e81e Mon Sep 17 00:00:00 2001 From: Antoni Spaanderman <56turtle56@gmail.com> Date: Tue, 30 Sep 2025 22:35:30 +0200 Subject: [PATCH 253/358] implement VecDeque extend_front and prepend, add tests --- alloc/src/collections/vec_deque/mod.rs | 84 +++++++++++++++- .../src/collections/vec_deque/spec_extend.rs | 95 +++++++++++++++++++ alloc/src/lib.rs | 1 + alloctests/tests/vec_deque.rs | 43 +++++++++ 4 files changed, 222 insertions(+), 1 deletion(-) diff --git a/alloc/src/collections/vec_deque/mod.rs b/alloc/src/collections/vec_deque/mod.rs index dc5aaa8726032..fc0220022dbb5 100644 --- a/alloc/src/collections/vec_deque/mod.rs +++ b/alloc/src/collections/vec_deque/mod.rs @@ -52,7 +52,7 @@ pub use self::iter::Iter; mod iter; -use self::spec_extend::SpecExtend; +use self::spec_extend::{SpecExtend, SpecExtendFront}; mod spec_extend; @@ -179,6 +179,21 @@ impl VecDeque { self.len += 1; } + /// Prepends an element to the buffer. + /// + /// # Safety + /// + /// May only be called if `deque.len() < deque.capacity()` + #[inline] + unsafe fn push_front_unchecked(&mut self, element: T) { + self.head = self.wrap_sub(self.head, 1); + // SAFETY: Because of the precondition, it's guaranteed that there is space + // in the logical array before the first element (where self.head is now). + unsafe { self.buffer_write(self.head, element) }; + // This can't overflow because `deque.len() < deque.capacity() <= usize::MAX`. + self.len += 1; + } + /// Moves an element out of the buffer #[inline] unsafe fn buffer_read(&mut self, off: usize) -> T { @@ -2122,6 +2137,73 @@ impl VecDeque { unsafe { self.buffer_write(self.to_physical_idx(len), value) } } + /// Prepends all contents of the iterator to the front of the deque. + /// The order of the contents is preserved. + /// + /// To get behavior like [`append`][VecDeque::append] where elements are moved + /// from the other collection to this one, use `self.prepend(other.drain(..))`. + /// + /// # Examples + /// + /// ``` + /// #![feature(deque_extend_front)] + /// use std::collections::VecDeque; + /// + /// let mut deque = VecDeque::from([4, 5, 6]); + /// deque.prepend([1, 2, 3]); + /// assert_eq!(deque, [1, 2, 3, 4, 5, 6]); + /// ``` + /// + /// Move values between collections like [`append`][VecDeque::append] does but prepend to the front: + /// + /// ``` + /// #![feature(deque_extend_front)] + /// use std::collections::VecDeque; + /// + /// let mut deque1 = VecDeque::from([4, 5, 6]); + /// let mut deque2 = VecDeque::from([1, 2, 3]); + /// deque1.prepend(deque2.drain(..)); + /// assert_eq!(deque1, [1, 2, 3, 4, 5, 6]); + /// assert!(deque2.is_empty()); + /// ``` + #[unstable(feature = "deque_extend_front", issue = "146975")] + #[track_caller] + pub fn prepend>(&mut self, other: I) { + self.extend_front(other.into_iter().rev()) + } + + /// Prepends all contents of the iterator to the front of the deque, + /// as if [`push_front`][VecDeque::push_front] was called repeatedly with + /// the values yielded by the iterator. + /// + /// # Examples + /// + /// ``` + /// #![feature(deque_extend_front)] + /// use std::collections::VecDeque; + /// + /// let mut deque = VecDeque::from([4, 5, 6]); + /// deque.extend_front([3, 2, 1]); + /// assert_eq!(deque, [1, 2, 3, 4, 5, 6]); + /// ``` + /// + /// This behaves like [`push_front`][VecDeque::push_front] was called repeatedly: + /// + /// ``` + /// use std::collections::VecDeque; + /// + /// let mut deque = VecDeque::from([4, 5, 6]); + /// for v in [3, 2, 1] { + /// deque.push_front(v); + /// } + /// assert_eq!(deque, [1, 2, 3, 4, 5, 6]); + /// ``` + #[unstable(feature = "deque_extend_front", issue = "146975")] + #[track_caller] + pub fn extend_front>(&mut self, iter: I) { + >::spec_extend_front(self, iter.into_iter()); + } + #[inline] fn is_contiguous(&self) -> bool { // Do the calculation like this to avoid overflowing if len + head > usize::MAX diff --git a/alloc/src/collections/vec_deque/spec_extend.rs b/alloc/src/collections/vec_deque/spec_extend.rs index 6c2199135e08a..3e830d2afe676 100644 --- a/alloc/src/collections/vec_deque/spec_extend.rs +++ b/alloc/src/collections/vec_deque/spec_extend.rs @@ -1,3 +1,5 @@ +#[cfg(not(test))] +use core::iter::Rev; use core::iter::TrustedLen; use core::slice; @@ -114,3 +116,96 @@ where } } } + +// Specialization trait used for VecDeque::extend_front +pub(super) trait SpecExtendFront { + #[track_caller] + fn spec_extend_front(&mut self, iter: I); +} + +impl SpecExtendFront for VecDeque +where + I: Iterator, +{ + #[track_caller] + default fn spec_extend_front(&mut self, mut iter: I) { + // This function should be the moral equivalent of: + // + // for item in iter { + // self.push_front(item); + // } + + while let Some(element) = iter.next() { + let (lower, _) = iter.size_hint(); + self.reserve(lower.saturating_add(1)); + + // SAFETY: We just reserved space for at least one element. + unsafe { self.push_front_unchecked(element) }; + + // Inner loop to avoid repeatedly calling `reserve`. + while self.len < self.capacity() { + let Some(element) = iter.next() else { + return; + }; + // SAFETY: The loop condition guarantees that `self.len() < self.capacity()`. + unsafe { self.push_front_unchecked(element) }; + } + } + } +} + +#[cfg(not(test))] +impl SpecExtendFront> for VecDeque { + #[track_caller] + fn spec_extend_front(&mut self, mut iterator: vec::IntoIter) { + let slice = iterator.as_mut_slice(); + slice.reverse(); + unsafe { prepend(self, slice) }; + iterator.forget_remaining_elements(); + } +} + +#[cfg(not(test))] +impl SpecExtendFront>> for VecDeque { + #[track_caller] + fn spec_extend_front(&mut self, iterator: Rev>) { + let mut iterator = iterator.into_inner(); + unsafe { prepend(self, iterator.as_slice()) }; + iterator.forget_remaining_elements(); + } +} + +// impl SpecExtendFront>> for VecDeque +// where +// T: Copy, +// { +// #[track_caller] +// fn spec_extend_front(&mut self, _iter: Copied>) { +// // unsafe { prepend(self, slice) }; +// // reverse in place? +// } +// } + +// impl SpecExtendFront>>> for VecDeque +// where +// T: Copy, +// { +// #[track_caller] +// fn spec_extend_front(&mut self, iter: Rev>>) { +// unsafe { prepend(self, iter.into_inner().it.as_slice()) }; +// } +// } + +/// # Safety +/// +/// `slice` will be copied into the deque, make sure to forget the items if `T` is not `Copy`. +#[cfg(not(test))] +unsafe fn prepend(deque: &mut VecDeque, slice: &[T]) { + deque.reserve(slice.len()); + + unsafe { + deque.head = deque.wrap_sub(deque.head, slice.len()); + deque.copy_slice(deque.head, slice); + deque.len += slice.len(); + } +} diff --git a/alloc/src/lib.rs b/alloc/src/lib.rs index 786f88c29ef46..81feb1cfc3f27 100644 --- a/alloc/src/lib.rs +++ b/alloc/src/lib.rs @@ -134,6 +134,7 @@ #![feature(ptr_alignment_type)] #![feature(ptr_internals)] #![feature(ptr_metadata)] +#![feature(rev_into_inner)] #![feature(set_ptr_value)] #![feature(sized_type_properties)] #![feature(slice_from_ptr_range)] diff --git a/alloctests/tests/vec_deque.rs b/alloctests/tests/vec_deque.rs index 0a4a0e0cac4d7..6442ee536fb57 100644 --- a/alloctests/tests/vec_deque.rs +++ b/alloctests/tests/vec_deque.rs @@ -2081,3 +2081,46 @@ fn test_extend_and_prepend_from_within() { v.extend_from_within(..); assert_eq!(v.iter().map(|s| &**s).collect::(), "123123123123"); } + +#[test] +fn test_extend_front() { + let mut v = VecDeque::new(); + v.extend_front(0..3); + assert_eq!(v, [2, 1, 0]); + v.extend_front(3..6); + assert_eq!(v, [5, 4, 3, 2, 1, 0]); + v.prepend([1; 4]); + assert_eq!(v, [1, 1, 1, 1, 5, 4, 3, 2, 1, 0]); + + let mut v = VecDeque::with_capacity(8); + let cap = v.capacity(); + v.extend(0..4); + v.truncate_front(2); + v.extend_front(4..8); + assert_eq!(v.as_slices(), ([7, 6].as_slice(), [5, 4, 2, 3].as_slice())); + assert_eq!(v.capacity(), cap); + + let mut v = VecDeque::new(); + v.extend_front([]); + v.extend_front(None); + v.extend_front(vec![]); + v.prepend([]); + v.prepend(None); + v.prepend(vec![]); + assert_eq!(v.capacity(), 0); + v.extend_front(Some(123)); + assert_eq!(v, [123]); +} + +#[test] +fn test_extend_front_specialization() { + let mut v = VecDeque::with_capacity(4); + v.prepend(vec![1, 2, 3]); + assert_eq!(v, [1, 2, 3]); + v.pop_front(); + v.prepend((-4..2).collect::>()); + assert_eq!(v, (-4..=3).collect::>()); + v.clear(); + v.extend_front(vec![1, 2, 3]); + assert_eq!(v, [3, 2, 1]); +} From fc96613347b17663c0f7471fca02691bd148851e Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Sun, 7 Sep 2025 22:05:59 +0530 Subject: [PATCH 254/358] library: std: sys: net: uefi: tcp: Implement write_vectored - A working vectored write implementation for TCP4. - Also introduces a small helper UefiBox intended to be used with heap allocated UEFI DSTs. - Tested on OVMF Signed-off-by: Ayush Singh --- std/src/sys/net/connection/uefi/mod.rs | 5 +- std/src/sys/net/connection/uefi/tcp.rs | 12 ++++- std/src/sys/net/connection/uefi/tcp4.rs | 69 ++++++++++++++++++++----- std/src/sys/pal/uefi/helpers.rs | 37 +++++++++++++ 4 files changed, 107 insertions(+), 16 deletions(-) diff --git a/std/src/sys/net/connection/uefi/mod.rs b/std/src/sys/net/connection/uefi/mod.rs index 004f6d413a1f3..d76e3e576f330 100644 --- a/std/src/sys/net/connection/uefi/mod.rs +++ b/std/src/sys/net/connection/uefi/mod.rs @@ -82,12 +82,11 @@ impl TcpStream { } pub fn write_vectored(&self, buf: &[IoSlice<'_>]) -> io::Result { - // FIXME: UEFI does support vectored write, so implement that. - crate::io::default_write_vectored(|b| self.write(b), buf) + self.inner.write_vectored(buf, self.write_timeout()?) } pub fn is_write_vectored(&self) -> bool { - false + true } pub fn peer_addr(&self) -> io::Result { diff --git a/std/src/sys/net/connection/uefi/tcp.rs b/std/src/sys/net/connection/uefi/tcp.rs index aac97007bbfe5..16283e64fb35a 100644 --- a/std/src/sys/net/connection/uefi/tcp.rs +++ b/std/src/sys/net/connection/uefi/tcp.rs @@ -1,5 +1,5 @@ use super::tcp4; -use crate::io; +use crate::io::{self, IoSlice}; use crate::net::SocketAddr; use crate::ptr::NonNull; use crate::sys::{helpers, unsupported}; @@ -28,6 +28,16 @@ impl Tcp { } } + pub(crate) fn write_vectored( + &self, + buf: &[IoSlice<'_>], + timeout: Option, + ) -> io::Result { + match self { + Self::V4(client) => client.write_vectored(buf, timeout), + } + } + pub(crate) fn read(&self, buf: &mut [u8], timeout: Option) -> io::Result { match self { Self::V4(client) => client.read(buf, timeout), diff --git a/std/src/sys/net/connection/uefi/tcp4.rs b/std/src/sys/net/connection/uefi/tcp4.rs index 75862ff247b4f..ba0424454d738 100644 --- a/std/src/sys/net/connection/uefi/tcp4.rs +++ b/std/src/sys/net/connection/uefi/tcp4.rs @@ -1,7 +1,7 @@ use r_efi::efi::{self, Status}; use r_efi::protocols::tcp4; -use crate::io; +use crate::io::{self, IoSlice}; use crate::net::SocketAddrV4; use crate::ptr::NonNull; use crate::sync::atomic::{AtomicBool, Ordering}; @@ -108,11 +108,7 @@ impl Tcp4 { } pub(crate) fn write(&self, buf: &[u8], timeout: Option) -> io::Result { - let evt = unsafe { self.create_evt() }?; - let completion_token = - tcp4::CompletionToken { event: evt.as_ptr(), status: Status::SUCCESS }; let data_len = u32::try_from(buf.len()).unwrap_or(u32::MAX); - let fragment = tcp4::FragmentData { fragment_length: data_len, fragment_buffer: buf.as_ptr().cast::().cast_mut(), @@ -125,14 +121,63 @@ impl Tcp4 { fragment_table: [fragment], }; - let protocol = self.protocol.as_ptr(); - let mut token = tcp4::IoToken { - completion_token, - packet: tcp4::IoTokenPacket { - tx_data: (&raw mut tx_data).cast::>(), - }, + self.write_inner((&raw mut tx_data).cast(), timeout).map(|_| data_len as usize) + } + + pub(crate) fn write_vectored( + &self, + buf: &[IoSlice<'_>], + timeout: Option, + ) -> io::Result { + let mut data_length = 0u32; + let mut fragment_count = 0u32; + + // Calculate how many IoSlice in buf can be transmitted. + for i in buf { + // IoSlice length is always <= u32::MAX in UEFI. + match data_length + .checked_add(u32::try_from(i.as_slice().len()).expect("value is stored as a u32")) + { + Some(x) => data_length = x, + None => break, + } + fragment_count += 1; + } + + let tx_data_size = size_of::>() + + size_of::() * (fragment_count as usize); + let mut tx_data = helpers::UefiBox::::new(tx_data_size)?; + tx_data.write(tcp4::TransmitData { + push: r_efi::efi::Boolean::FALSE, + urgent: r_efi::efi::Boolean::FALSE, + data_length, + fragment_count, + fragment_table: [], + }); + unsafe { + // SAFETY: IoSlice and FragmentData are guaranteed to have same layout. + crate::ptr::copy_nonoverlapping( + buf.as_ptr().cast(), + (*tx_data.as_mut_ptr()).fragment_table.as_mut_ptr(), + fragment_count as usize, + ); }; + self.write_inner(tx_data.as_mut_ptr(), timeout).map(|_| data_length as usize) + } + + fn write_inner( + &self, + tx_data: *mut tcp4::TransmitData, + timeout: Option, + ) -> io::Result<()> { + let evt = unsafe { self.create_evt() }?; + let completion_token = + tcp4::CompletionToken { event: evt.as_ptr(), status: Status::SUCCESS }; + + let protocol = self.protocol.as_ptr(); + let mut token = tcp4::IoToken { completion_token, packet: tcp4::IoTokenPacket { tx_data } }; + let r = unsafe { ((*protocol).transmit)(protocol, &mut token) }; if r.is_error() { return Err(io::Error::from_raw_os_error(r.as_usize())); @@ -143,7 +188,7 @@ impl Tcp4 { if completion_token.status.is_error() { Err(io::Error::from_raw_os_error(completion_token.status.as_usize())) } else { - Ok(data_len as usize) + Ok(()) } } diff --git a/std/src/sys/pal/uefi/helpers.rs b/std/src/sys/pal/uefi/helpers.rs index c0d69c3e0029a..852e0d6b051bd 100644 --- a/std/src/sys/pal/uefi/helpers.rs +++ b/std/src/sys/pal/uefi/helpers.rs @@ -12,6 +12,7 @@ use r_efi::efi::{self, Guid}; use r_efi::protocols::{device_path, device_path_to_text, service_binding, shell}; +use crate::alloc::Layout; use crate::ffi::{OsStr, OsString}; use crate::io::{self, const_error}; use crate::marker::PhantomData; @@ -769,3 +770,39 @@ pub(crate) const fn ipv4_to_r_efi(addr: crate::net::Ipv4Addr) -> efi::Ipv4Addres pub(crate) const fn ipv4_from_r_efi(ip: efi::Ipv4Address) -> crate::net::Ipv4Addr { crate::net::Ipv4Addr::new(ip.addr[0], ip.addr[1], ip.addr[2], ip.addr[3]) } + +/// This type is intended for use with ZSTs. Since such types are unsized, a reference to such types +/// is not valid in Rust. Thus, only pointers should be used when interacting with such types. +pub(crate) struct UefiBox { + inner: NonNull, + size: usize, +} + +impl UefiBox { + pub(crate) fn new(len: usize) -> io::Result { + assert!(len >= size_of::()); + // UEFI always expects types to be 8 byte aligned. + let layout = Layout::from_size_align(len, 8).unwrap(); + let ptr = unsafe { crate::alloc::alloc(layout) }; + + match NonNull::new(ptr.cast()) { + Some(inner) => Ok(Self { inner, size: len }), + None => Err(io::Error::new(io::ErrorKind::OutOfMemory, "Allocation failed")), + } + } + + pub(crate) fn write(&mut self, data: T) { + unsafe { self.inner.write(data) } + } + + pub(crate) fn as_mut_ptr(&mut self) -> *mut T { + self.inner.as_ptr().cast() + } +} + +impl Drop for UefiBox { + fn drop(&mut self) { + let layout = Layout::from_size_align(self.size, 8).unwrap(); + unsafe { crate::alloc::dealloc(self.inner.as_ptr().cast(), layout) }; + } +} From b83348afeca38d0bc61d93ab2ac34b63b81454a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ila=C3=AF=20Deutel?= <10098207+ilai-deutel@users.noreply.github.com> Date: Sat, 1 Nov 2025 23:46:35 -0400 Subject: [PATCH 255/358] Fix documentation for std::panic::update_hook * `set_hook` expects a boxed function * Missing closing delimiter for the closure --- std/src/panicking.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/std/src/panicking.rs b/std/src/panicking.rs index b7be869c4eb48..9af3e5f63ffb4 100644 --- a/std/src/panicking.rs +++ b/std/src/panicking.rs @@ -215,10 +215,10 @@ pub fn take_hook() -> Box) + 'static + Sync + Send> { /// /// // Equivalent to /// // let prev = panic::take_hook(); -/// // panic::set_hook(move |info| { +/// // panic::set_hook(Box::new(move |info| { /// // println!("..."); /// // prev(info); -/// // ); +/// // })); /// panic::update_hook(move |prev, info| { /// println!("Print custom message and execute panic handler as usual"); /// prev(info); From 515af1ad1f428eff162adeb09363bb618a7e8c3b Mon Sep 17 00:00:00 2001 From: Jieyou Xu Date: Sun, 2 Nov 2025 17:29:24 +0800 Subject: [PATCH 256/358] TypeId: make unstable layout/size explicit Or worded differently, explicitly remark non-stable-guarantee of `TypeId` layout and size. --- core/src/any.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/src/any.rs b/core/src/any.rs index 3ab95438c3ff3..655ec4dff309a 100644 --- a/core/src/any.rs +++ b/core/src/any.rs @@ -611,6 +611,15 @@ impl dyn Any + Send + Sync { /// noting that the hashes and ordering will vary between Rust releases. Beware /// of relying on them inside of your code! /// +/// # Layout +/// +/// Like other [`Rust`-representation][repr-rust] types, `TypeId`'s size and layout are unstable. +/// In particular, this means that you cannot rely on the size and layout of `TypeId` remaining the +/// same between Rust releases; they are subject to change without prior notice between Rust +/// releases. +/// +/// [repr-rust]: https://doc.rust-lang.org/reference/type-layout.html#r-layout.repr.rust.unspecified +/// /// # Danger of Improper Variance /// /// You might think that subtyping is impossible between two static types, From 196a5faa4177f076c016333133cf78148992b3f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sun, 2 Nov 2025 11:03:31 +0100 Subject: [PATCH 257/358] Generalize branch references to HEAD --- core/src/intrinsics/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index 4cee77fda4fba..5ba2d92a4596f 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -5,16 +5,16 @@ //! intrinsics via stable wrapper functions. Use these instead. //! //! These are the imports making intrinsics available to Rust code. The actual implementations live in the compiler. -//! Some of these intrinsics are lowered to MIR in . -//! The remaining intrinsics are implemented for the LLVM backend in -//! and , -//! and for const evaluation in . +//! Some of these intrinsics are lowered to MIR in . +//! The remaining intrinsics are implemented for the LLVM backend in +//! and , +//! and for const evaluation in . //! //! # Const intrinsics //! //! In order to make an intrinsic unstable usable at compile-time, copy the implementation from //! to -//! +//! //! and make the intrinsic declaration below a `const fn`. This should be done in coordination with //! wg-const-eval. //! From 38cf54cabcae09bf73d7fed22bb2f3d5d532bb29 Mon Sep 17 00:00:00 2001 From: nxsaken Date: Sun, 2 Nov 2025 16:34:36 +0400 Subject: [PATCH 258/358] Rename {i,u}N::*exact_div to *div_exact --- core/src/num/int_macros.rs | 30 +++++++++--------- core/src/num/uint_macros.rs | 24 +++++++------- coretests/tests/num/int_macros.rs | 50 +++++++++++++++--------------- coretests/tests/num/uint_macros.rs | 28 ++++++++--------- 4 files changed, 66 insertions(+), 66 deletions(-) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index 16f85c71403ab..9086664b473e2 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -992,10 +992,10 @@ macro_rules! int_impl { /// /// ``` /// #![feature(exact_div)] - #[doc = concat!("assert_eq!((", stringify!($SelfT), "::MIN + 1).checked_exact_div(-1), Some(", stringify!($Max), "));")] - #[doc = concat!("assert_eq!((-5", stringify!($SelfT), ").checked_exact_div(2), None);")] - #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MIN.checked_exact_div(-1), None);")] - #[doc = concat!("assert_eq!((1", stringify!($SelfT), ").checked_exact_div(0), None);")] + #[doc = concat!("assert_eq!((", stringify!($SelfT), "::MIN + 1).checked_div_exact(-1), Some(", stringify!($Max), "));")] + #[doc = concat!("assert_eq!((-5", stringify!($SelfT), ").checked_div_exact(2), None);")] + #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MIN.checked_div_exact(-1), None);")] + #[doc = concat!("assert_eq!((1", stringify!($SelfT), ").checked_div_exact(0), None);")] /// ``` #[unstable( feature = "exact_div", @@ -1004,7 +1004,7 @@ macro_rules! int_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const fn checked_exact_div(self, rhs: Self) -> Option { + pub const fn checked_div_exact(self, rhs: Self) -> Option { if intrinsics::unlikely(rhs == 0 || ((self == Self::MIN) && (rhs == -1))) { None } else { @@ -1034,18 +1034,18 @@ macro_rules! int_impl { /// /// ``` /// #![feature(exact_div)] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(2), Some(32));")] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(32), Some(2));")] - #[doc = concat!("assert_eq!((", stringify!($SelfT), "::MIN + 1).exact_div(-1), Some(", stringify!($Max), "));")] - #[doc = concat!("assert_eq!(65", stringify!($SelfT), ".exact_div(2), None);")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".div_exact(2), Some(32));")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".div_exact(32), Some(2));")] + #[doc = concat!("assert_eq!((", stringify!($SelfT), "::MIN + 1).div_exact(-1), Some(", stringify!($Max), "));")] + #[doc = concat!("assert_eq!(65", stringify!($SelfT), ".div_exact(2), None);")] /// ``` /// ```should_panic /// #![feature(exact_div)] - #[doc = concat!("let _ = 64", stringify!($SelfT),".exact_div(0);")] + #[doc = concat!("let _ = 64", stringify!($SelfT),".div_exact(0);")] /// ``` /// ```should_panic /// #![feature(exact_div)] - #[doc = concat!("let _ = ", stringify!($SelfT), "::MIN.exact_div(-1);")] + #[doc = concat!("let _ = ", stringify!($SelfT), "::MIN.div_exact(-1);")] /// ``` #[unstable( feature = "exact_div", @@ -1055,7 +1055,7 @@ macro_rules! int_impl { without modifying the original"] #[inline] #[rustc_inherit_overflow_checks] - pub const fn exact_div(self, rhs: Self) -> Option { + pub const fn div_exact(self, rhs: Self) -> Option { if self % rhs != 0 { None } else { @@ -1069,7 +1069,7 @@ macro_rules! int_impl { /// /// This results in undefined behavior when `rhs == 0`, `self % rhs != 0`, or #[doc = concat!("`self == ", stringify!($SelfT), "::MIN && rhs == -1`,")] - /// i.e. when [`checked_exact_div`](Self::checked_exact_div) would return `None`. + /// i.e. when [`checked_div_exact`](Self::checked_div_exact) would return `None`. #[unstable( feature = "exact_div", issue = "139911", @@ -1077,10 +1077,10 @@ macro_rules! int_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const unsafe fn unchecked_exact_div(self, rhs: Self) -> Self { + pub const unsafe fn unchecked_div_exact(self, rhs: Self) -> Self { assert_unsafe_precondition!( check_language_ub, - concat!(stringify!($SelfT), "::unchecked_exact_div cannot overflow, divide by zero, or leave a remainder"), + concat!(stringify!($SelfT), "::unchecked_div_exact cannot overflow, divide by zero, or leave a remainder"), ( lhs: $SelfT = self, rhs: $SelfT = rhs, diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index 1efc551d670ab..08162abb31153 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -1222,10 +1222,10 @@ macro_rules! uint_impl { /// /// ``` /// #![feature(exact_div)] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".checked_exact_div(2), Some(32));")] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".checked_exact_div(32), Some(2));")] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".checked_exact_div(0), None);")] - #[doc = concat!("assert_eq!(65", stringify!($SelfT), ".checked_exact_div(2), None);")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".checked_div_exact(2), Some(32));")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".checked_div_exact(32), Some(2));")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".checked_div_exact(0), None);")] + #[doc = concat!("assert_eq!(65", stringify!($SelfT), ".checked_div_exact(2), None);")] /// ``` #[unstable( feature = "exact_div", @@ -1234,7 +1234,7 @@ macro_rules! uint_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const fn checked_exact_div(self, rhs: Self) -> Option { + pub const fn checked_div_exact(self, rhs: Self) -> Option { if intrinsics::unlikely(rhs == 0) { None } else { @@ -1259,9 +1259,9 @@ macro_rules! uint_impl { /// /// ``` /// #![feature(exact_div)] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(2), Some(32));")] - #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".exact_div(32), Some(2));")] - #[doc = concat!("assert_eq!(65", stringify!($SelfT), ".exact_div(2), None);")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".div_exact(2), Some(32));")] + #[doc = concat!("assert_eq!(64", stringify!($SelfT), ".div_exact(32), Some(2));")] + #[doc = concat!("assert_eq!(65", stringify!($SelfT), ".div_exact(2), None);")] /// ``` #[unstable( feature = "exact_div", @@ -1271,7 +1271,7 @@ macro_rules! uint_impl { without modifying the original"] #[inline] #[rustc_inherit_overflow_checks] - pub const fn exact_div(self, rhs: Self) -> Option { + pub const fn div_exact(self, rhs: Self) -> Option { if self % rhs != 0 { None } else { @@ -1284,7 +1284,7 @@ macro_rules! uint_impl { /// # Safety /// /// This results in undefined behavior when `rhs == 0` or `self % rhs != 0`, - /// i.e. when [`checked_exact_div`](Self::checked_exact_div) would return `None`. + /// i.e. when [`checked_div_exact`](Self::checked_div_exact) would return `None`. #[unstable( feature = "exact_div", issue = "139911", @@ -1292,10 +1292,10 @@ macro_rules! uint_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const unsafe fn unchecked_exact_div(self, rhs: Self) -> Self { + pub const unsafe fn unchecked_div_exact(self, rhs: Self) -> Self { assert_unsafe_precondition!( check_language_ub, - concat!(stringify!($SelfT), "::unchecked_exact_div divide by zero or leave a remainder"), + concat!(stringify!($SelfT), "::unchecked_div_exact divide by zero or leave a remainder"), ( lhs: $SelfT = self, rhs: $SelfT = rhs, diff --git a/coretests/tests/num/int_macros.rs b/coretests/tests/num/int_macros.rs index e640b7853bd94..37336f49ef1b6 100644 --- a/coretests/tests/num/int_macros.rs +++ b/coretests/tests/num/int_macros.rs @@ -724,42 +724,42 @@ macro_rules! int_module { } } - const EXACT_DIV_SUCCESS_DIVIDEND1: $T = 42; - const EXACT_DIV_SUCCESS_DIVISOR1: $T = 6; - const EXACT_DIV_SUCCESS_QUOTIENT1: $T = 7; - const EXACT_DIV_SUCCESS_DIVIDEND2: $T = 18; - const EXACT_DIV_SUCCESS_DIVISOR2: $T = 3; - const EXACT_DIV_SUCCESS_QUOTIENT2: $T = 6; - const EXACT_DIV_SUCCESS_DIVIDEND3: $T = -91; - const EXACT_DIV_SUCCESS_DIVISOR3: $T = 13; - const EXACT_DIV_SUCCESS_QUOTIENT3: $T = -7; - const EXACT_DIV_SUCCESS_DIVIDEND4: $T = -57; - const EXACT_DIV_SUCCESS_DIVISOR4: $T = -3; - const EXACT_DIV_SUCCESS_QUOTIENT4: $T = 19; + const DIV_EXACT_SUCCESS_DIVIDEND1: $T = 42; + const DIV_EXACT_SUCCESS_DIVISOR1: $T = 6; + const DIV_EXACT_SUCCESS_QUOTIENT1: $T = 7; + const DIV_EXACT_SUCCESS_DIVIDEND2: $T = 18; + const DIV_EXACT_SUCCESS_DIVISOR2: $T = 3; + const DIV_EXACT_SUCCESS_QUOTIENT2: $T = 6; + const DIV_EXACT_SUCCESS_DIVIDEND3: $T = -91; + const DIV_EXACT_SUCCESS_DIVISOR3: $T = 13; + const DIV_EXACT_SUCCESS_QUOTIENT3: $T = -7; + const DIV_EXACT_SUCCESS_DIVIDEND4: $T = -57; + const DIV_EXACT_SUCCESS_DIVISOR4: $T = -3; + const DIV_EXACT_SUCCESS_QUOTIENT4: $T = 19; test_runtime_and_compiletime! { - fn test_exact_div() { + fn test_div_exact() { // 42 / 6 - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), Some(EXACT_DIV_SUCCESS_QUOTIENT1)); - assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), Some(EXACT_DIV_SUCCESS_QUOTIENT1)); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(DIV_EXACT_SUCCESS_DIVIDEND1, DIV_EXACT_SUCCESS_DIVISOR1), Some(DIV_EXACT_SUCCESS_QUOTIENT1)); + assert_eq_const_safe!(Option<$T>: <$T>::div_exact(DIV_EXACT_SUCCESS_DIVIDEND1, DIV_EXACT_SUCCESS_DIVISOR1), Some(DIV_EXACT_SUCCESS_QUOTIENT1)); // 18 / 3 - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), Some(EXACT_DIV_SUCCESS_QUOTIENT2)); - assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), Some(EXACT_DIV_SUCCESS_QUOTIENT2)); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(DIV_EXACT_SUCCESS_DIVIDEND2, DIV_EXACT_SUCCESS_DIVISOR2), Some(DIV_EXACT_SUCCESS_QUOTIENT2)); + assert_eq_const_safe!(Option<$T>: <$T>::div_exact(DIV_EXACT_SUCCESS_DIVIDEND2, DIV_EXACT_SUCCESS_DIVISOR2), Some(DIV_EXACT_SUCCESS_QUOTIENT2)); // -91 / 13 - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND3, EXACT_DIV_SUCCESS_DIVISOR3), Some(EXACT_DIV_SUCCESS_QUOTIENT3)); - assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND3, EXACT_DIV_SUCCESS_DIVISOR3), Some(EXACT_DIV_SUCCESS_QUOTIENT3)); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(DIV_EXACT_SUCCESS_DIVIDEND3, DIV_EXACT_SUCCESS_DIVISOR3), Some(DIV_EXACT_SUCCESS_QUOTIENT3)); + assert_eq_const_safe!(Option<$T>: <$T>::div_exact(DIV_EXACT_SUCCESS_DIVIDEND3, DIV_EXACT_SUCCESS_DIVISOR3), Some(DIV_EXACT_SUCCESS_QUOTIENT3)); // -57 / -3 - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND4, EXACT_DIV_SUCCESS_DIVISOR4), Some(EXACT_DIV_SUCCESS_QUOTIENT4)); - assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND4, EXACT_DIV_SUCCESS_DIVISOR4), Some(EXACT_DIV_SUCCESS_QUOTIENT4)); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(DIV_EXACT_SUCCESS_DIVIDEND4, DIV_EXACT_SUCCESS_DIVISOR4), Some(DIV_EXACT_SUCCESS_QUOTIENT4)); + assert_eq_const_safe!(Option<$T>: <$T>::div_exact(DIV_EXACT_SUCCESS_DIVIDEND4, DIV_EXACT_SUCCESS_DIVISOR4), Some(DIV_EXACT_SUCCESS_QUOTIENT4)); // failures - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(1, 2), None); - assert_eq_const_safe!(Option<$T>: <$T>::exact_div(1, 2), None); - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(<$T>::MIN, -1), None); - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(0, 0), None); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(1, 2), None); + assert_eq_const_safe!(Option<$T>: <$T>::div_exact(1, 2), None); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(<$T>::MIN, -1), None); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(0, 0), None); } } }; diff --git a/coretests/tests/num/uint_macros.rs b/coretests/tests/num/uint_macros.rs index c1cfc448f14f5..b89a371efcc25 100644 --- a/coretests/tests/num/uint_macros.rs +++ b/coretests/tests/num/uint_macros.rs @@ -595,27 +595,27 @@ macro_rules! uint_module { } } - const EXACT_DIV_SUCCESS_DIVIDEND1: $T = 42; - const EXACT_DIV_SUCCESS_DIVISOR1: $T = 6; - const EXACT_DIV_SUCCESS_QUOTIENT1: $T = 7; - const EXACT_DIV_SUCCESS_DIVIDEND2: $T = 18; - const EXACT_DIV_SUCCESS_DIVISOR2: $T = 3; - const EXACT_DIV_SUCCESS_QUOTIENT2: $T = 6; + const DIV_EXACT_SUCCESS_DIVIDEND1: $T = 42; + const DIV_EXACT_SUCCESS_DIVISOR1: $T = 6; + const DIV_EXACT_SUCCESS_QUOTIENT1: $T = 7; + const DIV_EXACT_SUCCESS_DIVIDEND2: $T = 18; + const DIV_EXACT_SUCCESS_DIVISOR2: $T = 3; + const DIV_EXACT_SUCCESS_QUOTIENT2: $T = 6; test_runtime_and_compiletime! { - fn test_exact_div() { + fn test_div_exact() { // 42 / 6 - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), Some(EXACT_DIV_SUCCESS_QUOTIENT1)); - assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND1, EXACT_DIV_SUCCESS_DIVISOR1), Some(EXACT_DIV_SUCCESS_QUOTIENT1)); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(DIV_EXACT_SUCCESS_DIVIDEND1, DIV_EXACT_SUCCESS_DIVISOR1), Some(DIV_EXACT_SUCCESS_QUOTIENT1)); + assert_eq_const_safe!(Option<$T>: <$T>::div_exact(DIV_EXACT_SUCCESS_DIVIDEND1, DIV_EXACT_SUCCESS_DIVISOR1), Some(DIV_EXACT_SUCCESS_QUOTIENT1)); // 18 / 3 - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), Some(EXACT_DIV_SUCCESS_QUOTIENT2)); - assert_eq_const_safe!(Option<$T>: <$T>::exact_div(EXACT_DIV_SUCCESS_DIVIDEND2, EXACT_DIV_SUCCESS_DIVISOR2), Some(EXACT_DIV_SUCCESS_QUOTIENT2)); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(DIV_EXACT_SUCCESS_DIVIDEND2, DIV_EXACT_SUCCESS_DIVISOR2), Some(DIV_EXACT_SUCCESS_QUOTIENT2)); + assert_eq_const_safe!(Option<$T>: <$T>::div_exact(DIV_EXACT_SUCCESS_DIVIDEND2, DIV_EXACT_SUCCESS_DIVISOR2), Some(DIV_EXACT_SUCCESS_QUOTIENT2)); // failures - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(1, 2), None); - assert_eq_const_safe!(Option<$T>: <$T>::exact_div(1, 2), None); - assert_eq_const_safe!(Option<$T>: <$T>::checked_exact_div(0, 0), None); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(1, 2), None); + assert_eq_const_safe!(Option<$T>: <$T>::div_exact(1, 2), None); + assert_eq_const_safe!(Option<$T>: <$T>::checked_div_exact(0, 0), None); } } }; From f65c62c3f1cd39f4b4c894fa9a23e7baae2d08e2 Mon Sep 17 00:00:00 2001 From: nxsaken Date: Thu, 16 Oct 2025 11:26:31 +0400 Subject: [PATCH 259/358] Rename {u,i}N::*exact_sh{l,r} to *sh{l,r}_exact --- core/src/num/int_macros.rs | 30 +++++++++++++++--------------- core/src/num/uint_macros.rs | 24 ++++++++++++------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index 9086664b473e2..53217fec360ed 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -1431,17 +1431,17 @@ macro_rules! int_impl { /// ``` /// #![feature(exact_bitshifts)] /// - #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".exact_shl(4), Some(0x10));")] - #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".exact_shl(", stringify!($SelfT), "::BITS - 2), Some(1 << ", stringify!($SelfT), "::BITS - 2));")] - #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".exact_shl(", stringify!($SelfT), "::BITS - 1), None);")] - #[doc = concat!("assert_eq!((-0x2", stringify!($SelfT), ").exact_shl(", stringify!($SelfT), "::BITS - 2), Some(-0x2 << ", stringify!($SelfT), "::BITS - 2));")] - #[doc = concat!("assert_eq!((-0x2", stringify!($SelfT), ").exact_shl(", stringify!($SelfT), "::BITS - 1), None);")] + #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".shl_exact(4), Some(0x10));")] + #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".shl_exact(", stringify!($SelfT), "::BITS - 2), Some(1 << ", stringify!($SelfT), "::BITS - 2));")] + #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".shl_exact(", stringify!($SelfT), "::BITS - 1), None);")] + #[doc = concat!("assert_eq!((-0x2", stringify!($SelfT), ").shl_exact(", stringify!($SelfT), "::BITS - 2), Some(-0x2 << ", stringify!($SelfT), "::BITS - 2));")] + #[doc = concat!("assert_eq!((-0x2", stringify!($SelfT), ").shl_exact(", stringify!($SelfT), "::BITS - 1), None);")] /// ``` #[unstable(feature = "exact_bitshifts", issue = "144336")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const fn exact_shl(self, rhs: u32) -> Option<$SelfT> { + pub const fn shl_exact(self, rhs: u32) -> Option<$SelfT> { if rhs < self.leading_zeros() || rhs < self.leading_ones() { // SAFETY: rhs is checked above Some(unsafe { self.unchecked_shl(rhs) }) @@ -1458,16 +1458,16 @@ macro_rules! int_impl { /// /// This results in undefined behavior when `rhs >= self.leading_zeros() && rhs >= /// self.leading_ones()` i.e. when - #[doc = concat!("[`", stringify!($SelfT), "::exact_shl`]")] + #[doc = concat!("[`", stringify!($SelfT), "::shl_exact`]")] /// would return `None`. #[unstable(feature = "exact_bitshifts", issue = "144336")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const unsafe fn unchecked_exact_shl(self, rhs: u32) -> $SelfT { + pub const unsafe fn unchecked_shl_exact(self, rhs: u32) -> $SelfT { assert_unsafe_precondition!( check_library_ub, - concat!(stringify!($SelfT), "::unchecked_exact_shl cannot shift out bits that would change the value of the first bit"), + concat!(stringify!($SelfT), "::unchecked_shl_exact cannot shift out bits that would change the value of the first bit"), ( zeros: u32 = self.leading_zeros(), ones: u32 = self.leading_ones(), @@ -1611,14 +1611,14 @@ macro_rules! int_impl { /// ``` /// #![feature(exact_bitshifts)] /// - #[doc = concat!("assert_eq!(0x10", stringify!($SelfT), ".exact_shr(4), Some(0x1));")] - #[doc = concat!("assert_eq!(0x10", stringify!($SelfT), ".exact_shr(5), None);")] + #[doc = concat!("assert_eq!(0x10", stringify!($SelfT), ".shr_exact(4), Some(0x1));")] + #[doc = concat!("assert_eq!(0x10", stringify!($SelfT), ".shr_exact(5), None);")] /// ``` #[unstable(feature = "exact_bitshifts", issue = "144336")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const fn exact_shr(self, rhs: u32) -> Option<$SelfT> { + pub const fn shr_exact(self, rhs: u32) -> Option<$SelfT> { if rhs <= self.trailing_zeros() && rhs < <$SelfT>::BITS { // SAFETY: rhs is checked above Some(unsafe { self.unchecked_shr(rhs) }) @@ -1636,16 +1636,16 @@ macro_rules! int_impl { /// This results in undefined behavior when `rhs > self.trailing_zeros() || rhs >= #[doc = concat!(stringify!($SelfT), "::BITS`")] /// i.e. when - #[doc = concat!("[`", stringify!($SelfT), "::exact_shr`]")] + #[doc = concat!("[`", stringify!($SelfT), "::shr_exact`]")] /// would return `None`. #[unstable(feature = "exact_bitshifts", issue = "144336")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const unsafe fn unchecked_exact_shr(self, rhs: u32) -> $SelfT { + pub const unsafe fn unchecked_shr_exact(self, rhs: u32) -> $SelfT { assert_unsafe_precondition!( check_library_ub, - concat!(stringify!($SelfT), "::unchecked_exact_shr cannot shift out non-zero bits"), + concat!(stringify!($SelfT), "::unchecked_shr_exact cannot shift out non-zero bits"), ( zeros: u32 = self.trailing_zeros(), bits: u32 = <$SelfT>::BITS, diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index 08162abb31153..6ee1d8eec7192 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -1830,14 +1830,14 @@ macro_rules! uint_impl { /// ``` /// #![feature(exact_bitshifts)] /// - #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".exact_shl(4), Some(0x10));")] - #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".exact_shl(129), None);")] + #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".shl_exact(4), Some(0x10));")] + #[doc = concat!("assert_eq!(0x1", stringify!($SelfT), ".shl_exact(129), None);")] /// ``` #[unstable(feature = "exact_bitshifts", issue = "144336")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const fn exact_shl(self, rhs: u32) -> Option<$SelfT> { + pub const fn shl_exact(self, rhs: u32) -> Option<$SelfT> { if rhs <= self.leading_zeros() && rhs < <$SelfT>::BITS { // SAFETY: rhs is checked above Some(unsafe { self.unchecked_shl(rhs) }) @@ -1855,16 +1855,16 @@ macro_rules! uint_impl { /// This results in undefined behavior when `rhs > self.leading_zeros() || rhs >= #[doc = concat!(stringify!($SelfT), "::BITS`")] /// i.e. when - #[doc = concat!("[`", stringify!($SelfT), "::exact_shl`]")] + #[doc = concat!("[`", stringify!($SelfT), "::shl_exact`]")] /// would return `None`. #[unstable(feature = "exact_bitshifts", issue = "144336")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const unsafe fn unchecked_exact_shl(self, rhs: u32) -> $SelfT { + pub const unsafe fn unchecked_shl_exact(self, rhs: u32) -> $SelfT { assert_unsafe_precondition!( check_library_ub, - concat!(stringify!($SelfT), "::exact_shl_unchecked cannot shift out non-zero bits"), + concat!(stringify!($SelfT), "::unchecked_shl_exact cannot shift out non-zero bits"), ( zeros: u32 = self.leading_zeros(), bits: u32 = <$SelfT>::BITS, @@ -2002,14 +2002,14 @@ macro_rules! uint_impl { /// ``` /// #![feature(exact_bitshifts)] /// - #[doc = concat!("assert_eq!(0x10", stringify!($SelfT), ".exact_shr(4), Some(0x1));")] - #[doc = concat!("assert_eq!(0x10", stringify!($SelfT), ".exact_shr(5), None);")] + #[doc = concat!("assert_eq!(0x10", stringify!($SelfT), ".shr_exact(4), Some(0x1));")] + #[doc = concat!("assert_eq!(0x10", stringify!($SelfT), ".shr_exact(5), None);")] /// ``` #[unstable(feature = "exact_bitshifts", issue = "144336")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const fn exact_shr(self, rhs: u32) -> Option<$SelfT> { + pub const fn shr_exact(self, rhs: u32) -> Option<$SelfT> { if rhs <= self.trailing_zeros() && rhs < <$SelfT>::BITS { // SAFETY: rhs is checked above Some(unsafe { self.unchecked_shr(rhs) }) @@ -2027,16 +2027,16 @@ macro_rules! uint_impl { /// This results in undefined behavior when `rhs > self.trailing_zeros() || rhs >= #[doc = concat!(stringify!($SelfT), "::BITS`")] /// i.e. when - #[doc = concat!("[`", stringify!($SelfT), "::exact_shr`]")] + #[doc = concat!("[`", stringify!($SelfT), "::shr_exact`]")] /// would return `None`. #[unstable(feature = "exact_bitshifts", issue = "144336")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] - pub const unsafe fn unchecked_exact_shr(self, rhs: u32) -> $SelfT { + pub const unsafe fn unchecked_shr_exact(self, rhs: u32) -> $SelfT { assert_unsafe_precondition!( check_library_ub, - concat!(stringify!($SelfT), "::exact_shr_unchecked cannot shift out non-zero bits"), + concat!(stringify!($SelfT), "::unchecked_shr_exact cannot shift out non-zero bits"), ( zeros: u32 = self.trailing_zeros(), bits: u32 = <$SelfT>::BITS, From 54a5e0f29056b6f9e5f425829c8ab66c0724db2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sun, 2 Nov 2025 14:44:51 +0100 Subject: [PATCH 260/358] Prepare for merging from rust-lang/rust This updates the rust-version file to 73e6c9ebd9123154a196300ef58e30ec8928e74e. --- stdarch/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdarch/rust-version b/stdarch/rust-version index 1ced6098acf4b..e313eada45431 100644 --- a/stdarch/rust-version +++ b/stdarch/rust-version @@ -1 +1 @@ -32e7a4b92b109c24e9822c862a7c74436b50e564 +73e6c9ebd9123154a196300ef58e30ec8928e74e From fd7805fcb25594a6af3eaf9653bd11a5a6255122 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 2 Nov 2025 15:40:56 +0100 Subject: [PATCH 261/358] remove `unsafe` from `_mm_pause` uses --- core/src/hint.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/hint.rs b/core/src/hint.rs index 6efe95a9edce9..71acede7e3eb3 100644 --- a/core/src/hint.rs +++ b/core/src/hint.rs @@ -271,11 +271,11 @@ pub fn spin_loop() { crate::cfg_select! { target_arch = "x86" => { // SAFETY: the `cfg` attr ensures that we only execute this on x86 targets. - unsafe { crate::arch::x86::_mm_pause() } + crate::arch::x86::_mm_pause() } target_arch = "x86_64" => { // SAFETY: the `cfg` attr ensures that we only execute this on x86_64 targets. - unsafe { crate::arch::x86_64::_mm_pause() } + crate::arch::x86_64::_mm_pause() } target_arch = "riscv32" => crate::arch::riscv32::pause(), target_arch = "riscv64" => crate::arch::riscv64::pause(), From 10b1cce19fbbeac15120a51ad2725141c21a2016 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Wed, 24 Sep 2025 20:53:11 -0400 Subject: [PATCH 262/358] std_detect: Support run-time detection on OpenBSD using elf_aux_info --- std_detect/README.md | 4 +- std_detect/src/detect/mod.rs | 5 +- std_detect/src/detect/os/openbsd/auxvec.rs | 54 +++++++++++++++++++++ std_detect/src/detect/os/openbsd/mod.rs | 21 ++++++++ std_detect/src/detect/os/openbsd/powerpc.rs | 21 ++++++++ std_detect/tests/cpu-detection.rs | 7 ++- 6 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 std_detect/src/detect/os/openbsd/auxvec.rs create mode 100644 std_detect/src/detect/os/openbsd/mod.rs create mode 100644 std_detect/src/detect/os/openbsd/powerpc.rs diff --git a/std_detect/README.md b/std_detect/README.md index edc90d319a1da..177848dec1044 100644 --- a/std_detect/README.md +++ b/std_detect/README.md @@ -66,10 +66,12 @@ crate from working on applications in which `std` is not available. * FreeBSD: * `arm32`, `powerpc64`: `std_detect` supports these on FreeBSD by querying ELF - auxiliary vectors using `sysctl`. + auxiliary vectors using `elf_aux_info`. * `arm64`: run-time feature detection is implemented by directly querying `mrs`. * OpenBSD: + * `powerpc64`: `std_detect` supports these on OpenBSD by querying ELF auxiliary + vectors using `elf_aux_info`. * `arm64`: run-time feature detection is implemented by querying `sysctl`. * Windows: diff --git a/std_detect/src/detect/mod.rs b/std_detect/src/detect/mod.rs index 2bc6e9a24db9a..ae6fb2ab37279 100644 --- a/std_detect/src/detect/mod.rs +++ b/std_detect/src/detect/mod.rs @@ -61,11 +61,12 @@ cfg_select! { #[path = "os/freebsd/mod.rs"] mod os; } - all(target_os = "openbsd", target_arch = "aarch64", feature = "libc") => { + all(target_os = "openbsd", feature = "libc") => { #[allow(dead_code)] // we don't use code that calls the mrs instruction. + #[cfg(target_arch = "aarch64")] #[path = "os/aarch64.rs"] mod aarch64; - #[path = "os/openbsd/aarch64.rs"] + #[path = "os/openbsd/mod.rs"] mod os; } all(target_os = "windows", any(target_arch = "aarch64", target_arch = "arm64ec")) => { diff --git a/std_detect/src/detect/os/openbsd/auxvec.rs b/std_detect/src/detect/os/openbsd/auxvec.rs new file mode 100644 index 0000000000000..7a1efb2265d4c --- /dev/null +++ b/std_detect/src/detect/os/openbsd/auxvec.rs @@ -0,0 +1,54 @@ +//! Parses ELF auxiliary vectors. +#![cfg_attr( + any(target_arch = "aarch64", target_arch = "powerpc64", target_arch = "riscv64"), + allow(dead_code) +)] + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For OpenBSD, they are +/// defined in [machine/elf.h][elfh]. The hardware capabilities of a given CPU +/// can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// [elf.h]: https://github.com/openbsd/src/blob/master/sys/arch/arm64/include/elf.h +/// [elf.h]: https://github.com/openbsd/src/blob/master/sys/arch/powerpc64/include/elf.h +pub(crate) fn auxv() -> Result { + let hwcap = archauxv(libc::AT_HWCAP); + let hwcap2 = archauxv(libc::AT_HWCAP2); + // Zero could indicate that no features were detected, but it's also used to + // indicate an error. In particular, on many platforms AT_HWCAP2 will be + // legitimately zero, since it contains the most recent feature flags. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + Err(()) +} + +/// Tries to read the `key` from the auxiliary vector. +fn archauxv(key: libc::c_int) -> usize { + const OUT_LEN: libc::c_int = core::mem::size_of::() as libc::c_int; + let mut out: libc::c_ulong = 0; + unsafe { + let res = + libc::elf_aux_info(key, &mut out as *mut libc::c_ulong as *mut libc::c_void, OUT_LEN); + // If elf_aux_info fails, `out` will be left at zero (which is the proper default value). + debug_assert!(res == 0 || out == 0); + } + out as usize +} diff --git a/std_detect/src/detect/os/openbsd/mod.rs b/std_detect/src/detect/os/openbsd/mod.rs new file mode 100644 index 0000000000000..ebfdbd5e6bcfc --- /dev/null +++ b/std_detect/src/detect/os/openbsd/mod.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection on OpenBSD + +mod auxvec; + +cfg_select! { + target_arch = "aarch64" => { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } + target_arch = "powerpc64" => { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } + _ => { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/std_detect/src/detect/os/openbsd/powerpc.rs b/std_detect/src/detect/os/openbsd/powerpc.rs new file mode 100644 index 0000000000000..dd98ab2a3f76e --- /dev/null +++ b/std_detect/src/detect/os/openbsd/powerpc.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection for PowerPC on OpenBSD. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); + return value; + } + value +} diff --git a/std_detect/tests/cpu-detection.rs b/std_detect/tests/cpu-detection.rs index 0c4fa57f2b465..f46a914aae159 100644 --- a/std_detect/tests/cpu-detection.rs +++ b/std_detect/tests/cpu-detection.rs @@ -320,8 +320,11 @@ fn powerpc_linux() { } #[test] -#[cfg(all(target_arch = "powerpc64", any(target_os = "linux", target_os = "freebsd"),))] -fn powerpc64_linux_or_freebsd() { +#[cfg(all( + target_arch = "powerpc64", + any(target_os = "linux", target_os = "freebsd", target_os = "openbsd"), +))] +fn powerpc64_linux_or_bsd() { println!("altivec: {}", is_powerpc64_feature_detected!("altivec")); println!("vsx: {}", is_powerpc64_feature_detected!("vsx")); println!("power8: {}", is_powerpc64_feature_detected!("power8")); From bf20594708a984df621b9d1206c1b28364d6420a Mon Sep 17 00:00:00 2001 From: Antoni Spaanderman <56turtle56@gmail.com> Date: Mon, 3 Nov 2025 12:35:30 +0100 Subject: [PATCH 263/358] add specialization for extend_front and prepend with copied slice iterator --- alloc/src/collections/vec_deque/mod.rs | 29 ++++++++ .../src/collections/vec_deque/spec_extend.rs | 73 +++++++++++-------- alloc/src/lib.rs | 1 + alloctests/lib.rs | 2 + alloctests/tests/vec_deque.rs | 41 +++++++++-- core/src/iter/adapters/copied.rs | 6 ++ 6 files changed, 118 insertions(+), 34 deletions(-) diff --git a/alloc/src/collections/vec_deque/mod.rs b/alloc/src/collections/vec_deque/mod.rs index fc0220022dbb5..a85c874d54de1 100644 --- a/alloc/src/collections/vec_deque/mod.rs +++ b/alloc/src/collections/vec_deque/mod.rs @@ -520,6 +520,35 @@ impl VecDeque { } } + /// Copies all values from `src` to `dst` in reversed order, wrapping around if needed. + /// Assumes capacity is sufficient. + /// Equivalent to calling [`VecDeque::copy_slice`] with a [reversed](https://doc.rust-lang.org/std/primitive.slice.html#method.reverse) slice. + #[inline] + unsafe fn copy_slice_reversed(&mut self, dst: usize, src: &[T]) { + /// # Safety + /// + /// See [`ptr::copy_nonoverlapping`]. + unsafe fn copy_nonoverlapping_reversed(src: *const T, dst: *mut T, count: usize) { + for i in 0..count { + unsafe { ptr::copy_nonoverlapping(src.add(count - 1 - i), dst.add(i), 1) }; + } + } + + debug_assert!(src.len() <= self.capacity()); + let head_room = self.capacity() - dst; + if src.len() <= head_room { + unsafe { + copy_nonoverlapping_reversed(src.as_ptr(), self.ptr().add(dst), src.len()); + } + } else { + let (left, right) = src.split_at(src.len() - head_room); + unsafe { + copy_nonoverlapping_reversed(right.as_ptr(), self.ptr().add(dst), right.len()); + copy_nonoverlapping_reversed(left.as_ptr(), self.ptr(), left.len()); + } + } + } + /// Writes all values from `iter` to `dst`. /// /// # Safety diff --git a/alloc/src/collections/vec_deque/spec_extend.rs b/alloc/src/collections/vec_deque/spec_extend.rs index 3e830d2afe676..f73ba795cbea4 100644 --- a/alloc/src/collections/vec_deque/spec_extend.rs +++ b/alloc/src/collections/vec_deque/spec_extend.rs @@ -1,6 +1,4 @@ -#[cfg(not(test))] -use core::iter::Rev; -use core::iter::TrustedLen; +use core::iter::{Copied, Rev, TrustedLen}; use core::slice; use super::VecDeque; @@ -158,9 +156,9 @@ where impl SpecExtendFront> for VecDeque { #[track_caller] fn spec_extend_front(&mut self, mut iterator: vec::IntoIter) { - let slice = iterator.as_mut_slice(); - slice.reverse(); - unsafe { prepend(self, slice) }; + let slice = iterator.as_slice(); + // SAFETY: elements in the slice are forgotten after this call + unsafe { prepend_reversed(self, slice) }; iterator.forget_remaining_elements(); } } @@ -170,36 +168,40 @@ impl SpecExtendFront>> for VecDeque>) { let mut iterator = iterator.into_inner(); - unsafe { prepend(self, iterator.as_slice()) }; + let slice = iterator.as_slice(); + // SAFETY: elements in the slice are forgotten after this call + unsafe { prepend(self, slice) }; iterator.forget_remaining_elements(); } } -// impl SpecExtendFront>> for VecDeque -// where -// T: Copy, -// { -// #[track_caller] -// fn spec_extend_front(&mut self, _iter: Copied>) { -// // unsafe { prepend(self, slice) }; -// // reverse in place? -// } -// } - -// impl SpecExtendFront>>> for VecDeque -// where -// T: Copy, -// { -// #[track_caller] -// fn spec_extend_front(&mut self, iter: Rev>>) { -// unsafe { prepend(self, iter.into_inner().it.as_slice()) }; -// } -// } +impl<'a, T, A: Allocator> SpecExtendFront>> for VecDeque +where + Copied>: Iterator, +{ + #[track_caller] + fn spec_extend_front(&mut self, iter: Copied>) { + let slice = iter.into_inner().as_slice(); + // SAFETY: T is Copy because Copied> is Iterator + unsafe { prepend_reversed(self, slice) }; + } +} + +impl<'a, T, A: Allocator> SpecExtendFront>>> for VecDeque +where + Rev>>: Iterator, +{ + #[track_caller] + fn spec_extend_front(&mut self, iter: Rev>>) { + let slice = iter.into_inner().into_inner().as_slice(); + // SAFETY: T is Copy because Rev>> is Iterator + unsafe { prepend(self, slice) }; + } +} /// # Safety /// -/// `slice` will be copied into the deque, make sure to forget the items if `T` is not `Copy`. -#[cfg(not(test))] +/// Elements of `slice` will be copied into the deque, make sure to forget the items if `T` is not `Copy`. unsafe fn prepend(deque: &mut VecDeque, slice: &[T]) { deque.reserve(slice.len()); @@ -209,3 +211,16 @@ unsafe fn prepend(deque: &mut VecDeque, slice: &[T]) { deque.len += slice.len(); } } + +/// # Safety +/// +/// Elements of `slice` will be copied into the deque, make sure to forget the items if `T` is not `Copy`. +unsafe fn prepend_reversed(deque: &mut VecDeque, slice: &[T]) { + deque.reserve(slice.len()); + + unsafe { + deque.head = deque.wrap_sub(deque.head, slice.len()); + deque.copy_slice_reversed(deque.head, slice); + deque.len += slice.len(); + } +} diff --git a/alloc/src/lib.rs b/alloc/src/lib.rs index 81feb1cfc3f27..73197d021f1a3 100644 --- a/alloc/src/lib.rs +++ b/alloc/src/lib.rs @@ -106,6 +106,7 @@ #![feature(const_default)] #![feature(const_eval_select)] #![feature(const_heap)] +#![feature(copied_into_inner)] #![feature(core_intrinsics)] #![feature(deprecated_suggestion)] #![feature(deref_pure_trait)] diff --git a/alloctests/lib.rs b/alloctests/lib.rs index 0201c8752210c..efdcb893bfeef 100644 --- a/alloctests/lib.rs +++ b/alloctests/lib.rs @@ -20,6 +20,7 @@ #![feature(assert_matches)] #![feature(char_internals)] #![feature(char_max_len)] +#![feature(copied_into_inner)] #![feature(core_intrinsics)] #![feature(exact_size_is_empty)] #![feature(extend_one)] @@ -32,6 +33,7 @@ #![feature(maybe_uninit_uninit_array_transpose)] #![feature(ptr_alignment_type)] #![feature(ptr_internals)] +#![feature(rev_into_inner)] #![feature(sized_type_properties)] #![feature(slice_iter_mut_as_mut_slice)] #![feature(slice_ptr_get)] diff --git a/alloctests/tests/vec_deque.rs b/alloctests/tests/vec_deque.rs index 6442ee536fb57..cf31613577f74 100644 --- a/alloctests/tests/vec_deque.rs +++ b/alloctests/tests/vec_deque.rs @@ -2113,14 +2113,45 @@ fn test_extend_front() { } #[test] -fn test_extend_front_specialization() { +fn test_extend_front_specialization_vec_into_iter() { + // trigger 4 code paths: all combinations of prepend and extend_front, wrap and no wrap let mut v = VecDeque::with_capacity(4); v.prepend(vec![1, 2, 3]); assert_eq!(v, [1, 2, 3]); - v.pop_front(); - v.prepend((-4..2).collect::>()); - assert_eq!(v, (-4..=3).collect::>()); - v.clear(); + v.pop_back(); + // this should wrap around the physical buffer + v.prepend(vec![-1, 0]); + // check it really wrapped + assert_eq!(v.as_slices(), ([-1].as_slice(), [0, 1, 2].as_slice())); + + let mut v = VecDeque::with_capacity(4); v.extend_front(vec![1, 2, 3]); assert_eq!(v, [3, 2, 1]); + v.pop_back(); + // this should wrap around the physical buffer + v.extend_front(vec![4, 5]); + // check it really wrapped + assert_eq!(v.as_slices(), ([5].as_slice(), [4, 3, 2].as_slice())); +} + +#[test] +fn test_extend_front_specialization_copy_slice() { + // trigger 4 code paths: all combinations of prepend and extend_front, wrap and no wrap + let mut v = VecDeque::with_capacity(4); + v.prepend([1, 2, 3].as_slice().iter().copied()); + assert_eq!(v, [1, 2, 3]); + v.pop_back(); + // this should wrap around the physical buffer + v.prepend([-1, 0].as_slice().iter().copied()); + // check it really wrapped + assert_eq!(v.as_slices(), ([-1].as_slice(), [0, 1, 2].as_slice())); + + let mut v = VecDeque::with_capacity(4); + v.extend_front([1, 2, 3].as_slice().iter().copied()); + assert_eq!(v, [3, 2, 1]); + v.pop_back(); + // this should wrap around the physical buffer + v.extend_front([4, 5].as_slice().iter().copied()); + // check it really wrapped + assert_eq!(v.as_slices(), ([5].as_slice(), [4, 3, 2].as_slice())); } diff --git a/core/src/iter/adapters/copied.rs b/core/src/iter/adapters/copied.rs index 23e4e25ab5388..9627ace29795c 100644 --- a/core/src/iter/adapters/copied.rs +++ b/core/src/iter/adapters/copied.rs @@ -24,6 +24,12 @@ impl Copied { pub(in crate::iter) fn new(it: I) -> Copied { Copied { it } } + + #[doc(hidden)] + #[unstable(feature = "copied_into_inner", issue = "none")] + pub fn into_inner(self) -> I { + self.it + } } fn copy_fold(mut f: impl FnMut(Acc, T) -> Acc) -> impl FnMut(Acc, &T) -> Acc { From f160ad94beeccf35ca07fecae71ccd40cf32530f Mon Sep 17 00:00:00 2001 From: Jieyou Xu Date: Mon, 3 Nov 2025 19:39:50 +0800 Subject: [PATCH 264/358] Revert "unicode_data refactors RUST-147622" This PR reverts RUST-147622 for several reasons: 1. The RUST-147622 PR would format the generated core library code using an arbitrary `rustfmt` picked up from `PATH`, which will cause hard-to-debug failures when the `rustfmt` used to format the generated unicode data code versus the `rustfmt` used to format the in-tree library code. 2. Previously, the `unicode-table-generator` tests were not run under CI as part of `coretests`, and since for `x86_64-gnu-aux` job we run library `coretests` with `miri`, the generated tests unfortunately caused an unacceptably large Merge CI time regression from ~2 hours to ~3.5 hours, making it the slowest Merge CI job (and thus the new bottleneck). 3. This PR also has an unintended effect of causing a diagnostic regression (RUST-148387), though that's mostly an edge case not properly handled by `rustc` diagnostics. Given that these are three distinct causes with non-trivial fixes, I'm proposing to revert this PR to return us to baseline. This is not prejudice against relanding the changes with these issues addressed, but to alleviate time pressure to address these non-trivial issues. --- core/src/unicode/mod.rs | 3 +- core/src/unicode/rt.rs | 162 -- core/src/unicode/unicode_data.rs | 2537 +++++++++++----------- coretests/tests/lib.rs | 1 - coretests/tests/unicode.rs | 96 - coretests/tests/unicode/test_data.rs | 2928 -------------------------- 6 files changed, 1216 insertions(+), 4511 deletions(-) delete mode 100644 core/src/unicode/rt.rs delete mode 100644 coretests/tests/unicode/test_data.rs diff --git a/core/src/unicode/mod.rs b/core/src/unicode/mod.rs index 1ee97d64d01bc..c71fa754e68fb 100644 --- a/core/src/unicode/mod.rs +++ b/core/src/unicode/mod.rs @@ -18,9 +18,8 @@ pub(crate) use unicode_data::white_space::lookup as White_Space; pub(crate) mod printable; -mod rt; #[allow(unreachable_pub)] -pub mod unicode_data; +mod unicode_data; /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of /// `char` and `str` methods are based on. diff --git a/core/src/unicode/rt.rs b/core/src/unicode/rt.rs deleted file mode 100644 index c438635cd794e..0000000000000 --- a/core/src/unicode/rt.rs +++ /dev/null @@ -1,162 +0,0 @@ -//! Runtime support for `unicode_data`. - -#[inline(always)] -pub(super) const fn bitset_search< - const N: usize, - const CHUNK_SIZE: usize, - const N1: usize, - const CANONICAL: usize, - const CANONICALIZED: usize, ->( - needle: u32, - chunk_idx_map: &[u8; N], - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], - bitset_canonical: &[u64; CANONICAL], - bitset_canonicalized: &[(u8, u8); CANONICALIZED], -) -> bool { - let bucket_idx = (needle / 64) as usize; - let chunk_map_idx = bucket_idx / CHUNK_SIZE; - let chunk_piece = bucket_idx % CHUNK_SIZE; - // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. - let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { - chunk_idx_map[chunk_map_idx] - } else { - return false; - }; - let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; - // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. - let word = if idx < bitset_canonical.len() { - bitset_canonical[idx] - } else { - let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()]; - let mut word = bitset_canonical[real_idx as usize]; - let should_invert = mapping & (1 << 6) != 0; - if should_invert { - word = !word; - } - // Lower 6 bits - let quantity = mapping & ((1 << 6) - 1); - if mapping & (1 << 7) != 0 { - // shift - word >>= quantity as u64; - } else { - word = word.rotate_left(quantity as u32); - } - word - }; - (word & (1 << (needle % 64) as u64)) != 0 -} - -#[repr(transparent)] -pub(super) struct ShortOffsetRunHeader(pub(super) u32); - -impl ShortOffsetRunHeader { - pub(super) const fn new(start_index: usize, prefix_sum: u32) -> Self { - assert!(start_index < (1 << 11)); - assert!(prefix_sum < (1 << 21)); - - Self((start_index as u32) << 21 | prefix_sum) - } - - #[inline] - pub(super) const fn start_index(&self) -> usize { - (self.0 >> 21) as usize - } - - #[inline] - pub(super) const fn prefix_sum(&self) -> u32 { - self.0 & ((1 << 21) - 1) - } -} - -/// # Safety -/// -/// - The last element of `short_offset_runs` must be greater than `std::char::MAX`. -/// - The start indices of all elements in `short_offset_runs` must be less than `OFFSETS`. -#[inline(always)] -pub(super) unsafe fn skip_search( - needle: char, - short_offset_runs: &[ShortOffsetRunHeader; SOR], - offsets: &[u8; OFFSETS], -) -> bool { - let needle = needle as u32; - - let last_idx = - match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header.0 << 11) { - Ok(idx) => idx + 1, - Err(idx) => idx, - }; - // SAFETY: `last_idx` *cannot* be past the end of the array, as the last - // element is greater than `std::char::MAX` (the largest possible needle) - // as guaranteed by the caller. - // - // So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the - // correct location cannot be past it, so `Err(idx) => idx != length` either. - // - // This means that we can avoid bounds checking for the accesses below, too. - // - // We need to use `intrinsics::assume` since the `panic_nounwind` contained - // in `hint::assert_unchecked` may not be optimized out. - unsafe { crate::intrinsics::assume(last_idx < SOR) }; - - let mut offset_idx = short_offset_runs[last_idx].start_index(); - let length = if let Some(next) = short_offset_runs.get(last_idx + 1) { - (*next).start_index() - offset_idx - } else { - offsets.len() - offset_idx - }; - - let prev = - last_idx.checked_sub(1).map(|prev| short_offset_runs[prev].prefix_sum()).unwrap_or(0); - - let total = needle - prev; - let mut prefix_sum = 0; - for _ in 0..(length - 1) { - // SAFETY: It is guaranteed that `length <= OFFSETS - offset_idx`, - // so it follows that `length - 1 + offset_idx < OFFSETS`, therefore - // `offset_idx < OFFSETS` is always true in this loop. - // - // We need to use `intrinsics::assume` since the `panic_nounwind` contained - // in `hint::assert_unchecked` may not be optimized out. - unsafe { crate::intrinsics::assume(offset_idx < OFFSETS) }; - let offset = offsets[offset_idx]; - prefix_sum += offset as u32; - if prefix_sum > total { - break; - } - offset_idx += 1; - } - offset_idx % 2 == 1 -} - -/// # Safety -/// The second component of each tuple in `table` must either be: -/// - A valid `char` -/// - A value with the high bit (1 << 22) set, and the lower 22 bits -/// being a valid index into `multi`. -#[inline(always)] -pub(super) unsafe fn case_conversion( - c: char, - ascii_fn: fn(char) -> char, - table: &[(char, u32)], - multi: &[[char; 3]], -) -> [char; 3] { - const INDEX_MASK: u32 = 1 << 22; - - if c.is_ascii() { - return [ascii_fn(c), '\0', '\0']; - } - - let Ok(i) = table.binary_search_by(|&(key, _)| key.cmp(&c)) else { - return [c, '\0', '\0']; - }; - - let u = table[i].1; - match char::from_u32(u) { - Option::Some(c) => [c, '\0', '\0'], - Option::None => { - // SAFETY: Index comes from statically generated table - unsafe { *multi.get_unchecked((u & (INDEX_MASK - 1)) as usize) } - } - } -} diff --git a/core/src/unicode/unicode_data.rs b/core/src/unicode/unicode_data.rs index 81d0484310cf1..3c38b44224f87 100644 --- a/core/src/unicode/unicode_data.rs +++ b/core/src/unicode/unicode_data.rs @@ -11,64 +11,167 @@ // to_upper : 13656 bytes // Total : 31911 bytes -use super::rt::*; +#[inline(always)] +const fn bitset_search< + const N: usize, + const CHUNK_SIZE: usize, + const N1: usize, + const CANONICAL: usize, + const CANONICALIZED: usize, +>( + needle: u32, + chunk_idx_map: &[u8; N], + bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], + bitset_canonical: &[u64; CANONICAL], + bitset_canonicalized: &[(u8, u8); CANONICALIZED], +) -> bool { + let bucket_idx = (needle / 64) as usize; + let chunk_map_idx = bucket_idx / CHUNK_SIZE; + let chunk_piece = bucket_idx % CHUNK_SIZE; + // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. + let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { + chunk_idx_map[chunk_map_idx] + } else { + return false; + }; + let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; + // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. + let word = if idx < bitset_canonical.len() { + bitset_canonical[idx] + } else { + let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()]; + let mut word = bitset_canonical[real_idx as usize]; + let should_invert = mapping & (1 << 6) != 0; + if should_invert { + word = !word; + } + // Lower 6 bits + let quantity = mapping & ((1 << 6) - 1); + if mapping & (1 << 7) != 0 { + // shift + word >>= quantity as u64; + } else { + word = word.rotate_left(quantity as u32); + } + word + }; + (word & (1 << (needle % 64) as u64)) != 0 +} + +#[repr(transparent)] +struct ShortOffsetRunHeader(u32); + +impl ShortOffsetRunHeader { + const fn new(start_index: usize, prefix_sum: u32) -> Self { + assert!(start_index < (1 << 11)); + assert!(prefix_sum < (1 << 21)); + + Self((start_index as u32) << 21 | prefix_sum) + } + + #[inline] + const fn start_index(&self) -> usize { + (self.0 >> 21) as usize + } + + #[inline] + const fn prefix_sum(&self) -> u32 { + self.0 & ((1 << 21) - 1) + } +} + +/// # Safety +/// +/// - The last element of `short_offset_runs` must be greater than `std::char::MAX`. +/// - The start indices of all elements in `short_offset_runs` must be less than `OFFSETS`. +#[inline(always)] +unsafe fn skip_search( + needle: char, + short_offset_runs: &[ShortOffsetRunHeader; SOR], + offsets: &[u8; OFFSETS], +) -> bool { + let needle = needle as u32; + + let last_idx = + match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header.0 << 11) { + Ok(idx) => idx + 1, + Err(idx) => idx, + }; + // SAFETY: `last_idx` *cannot* be past the end of the array, as the last + // element is greater than `std::char::MAX` (the largest possible needle) + // as guaranteed by the caller. + // + // So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the + // correct location cannot be past it, so `Err(idx) => idx != length` either. + // + // This means that we can avoid bounds checking for the accesses below, too. + // + // We need to use `intrinsics::assume` since the `panic_nounwind` contained + // in `hint::assert_unchecked` may not be optimized out. + unsafe { crate::intrinsics::assume(last_idx < SOR) }; + + let mut offset_idx = short_offset_runs[last_idx].start_index(); + let length = if let Some(next) = short_offset_runs.get(last_idx + 1) { + (*next).start_index() - offset_idx + } else { + offsets.len() - offset_idx + }; + + let prev = + last_idx.checked_sub(1).map(|prev| short_offset_runs[prev].prefix_sum()).unwrap_or(0); + + let total = needle - prev; + let mut prefix_sum = 0; + for _ in 0..(length - 1) { + // SAFETY: It is guaranteed that `length <= OFFSETS - offset_idx`, + // so it follows that `length - 1 + offset_idx < OFFSETS`, therefore + // `offset_idx < OFFSETS` is always true in this loop. + // + // We need to use `intrinsics::assume` since the `panic_nounwind` contained + // in `hint::assert_unchecked` may not be optimized out. + unsafe { crate::intrinsics::assume(offset_idx < OFFSETS) }; + let offset = offsets[offset_idx]; + prefix_sum += offset as u32; + if prefix_sum > total { + break; + } + offset_idx += 1; + } + offset_idx % 2 == 1 +} pub const UNICODE_VERSION: (u8, u8, u8) = (17, 0, 0); +#[rustfmt::skip] pub mod alphabetic { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 51] = [ - ShortOffsetRunHeader::new(0, 706), - ShortOffsetRunHeader::new(12, 4681), - ShortOffsetRunHeader::new(414, 5741), - ShortOffsetRunHeader::new(452, 7958), - ShortOffsetRunHeader::new(552, 9398), - ShortOffsetRunHeader::new(623, 11264), - ShortOffsetRunHeader::new(625, 12293), - ShortOffsetRunHeader::new(663, 13312), - ShortOffsetRunHeader::new(687, 19904), - ShortOffsetRunHeader::new(688, 42125), - ShortOffsetRunHeader::new(690, 42509), - ShortOffsetRunHeader::new(694, 55204), - ShortOffsetRunHeader::new(778, 63744), - ShortOffsetRunHeader::new(783, 64110), - ShortOffsetRunHeader::new(784, 64830), - ShortOffsetRunHeader::new(806, 66176), - ShortOffsetRunHeader::new(847, 67383), - ShortOffsetRunHeader::new(894, 73440), - ShortOffsetRunHeader::new(1217, 74650), - ShortOffsetRunHeader::new(1228, 77712), - ShortOffsetRunHeader::new(1233, 78896), - ShortOffsetRunHeader::new(1236, 82939), - ShortOffsetRunHeader::new(1240, 83527), - ShortOffsetRunHeader::new(1242, 90368), - ShortOffsetRunHeader::new(1243, 92160), - ShortOffsetRunHeader::new(1245, 92729), - ShortOffsetRunHeader::new(1246, 93504), - ShortOffsetRunHeader::new(1261, 101590), - ShortOffsetRunHeader::new(1282, 110576), - ShortOffsetRunHeader::new(1287, 110883), - ShortOffsetRunHeader::new(1294, 111356), - ShortOffsetRunHeader::new(1304, 113664), - ShortOffsetRunHeader::new(1305, 119808), - ShortOffsetRunHeader::new(1315, 120486), - ShortOffsetRunHeader::new(1352, 122624), - ShortOffsetRunHeader::new(1375, 123536), - ShortOffsetRunHeader::new(1399, 124112), - ShortOffsetRunHeader::new(1403, 126464), - ShortOffsetRunHeader::new(1431, 127280), - ShortOffsetRunHeader::new(1497, 131072), - ShortOffsetRunHeader::new(1503, 173792), - ShortOffsetRunHeader::new(1504, 178206), - ShortOffsetRunHeader::new(1506, 183982), - ShortOffsetRunHeader::new(1508, 191457), - ShortOffsetRunHeader::new(1510, 192094), - ShortOffsetRunHeader::new(1512, 194560), - ShortOffsetRunHeader::new(1513, 195102), - ShortOffsetRunHeader::new(1514, 196608), - ShortOffsetRunHeader::new(1515, 201547), - ShortOffsetRunHeader::new(1516, 210042), + ShortOffsetRunHeader::new(0, 706), ShortOffsetRunHeader::new(12, 4681), + ShortOffsetRunHeader::new(414, 5741), ShortOffsetRunHeader::new(452, 7958), + ShortOffsetRunHeader::new(552, 9398), ShortOffsetRunHeader::new(623, 11264), + ShortOffsetRunHeader::new(625, 12293), ShortOffsetRunHeader::new(663, 13312), + ShortOffsetRunHeader::new(687, 19904), ShortOffsetRunHeader::new(688, 42125), + ShortOffsetRunHeader::new(690, 42509), ShortOffsetRunHeader::new(694, 55204), + ShortOffsetRunHeader::new(778, 63744), ShortOffsetRunHeader::new(783, 64110), + ShortOffsetRunHeader::new(784, 64830), ShortOffsetRunHeader::new(806, 66176), + ShortOffsetRunHeader::new(847, 67383), ShortOffsetRunHeader::new(894, 73440), + ShortOffsetRunHeader::new(1217, 74650), ShortOffsetRunHeader::new(1228, 77712), + ShortOffsetRunHeader::new(1233, 78896), ShortOffsetRunHeader::new(1236, 82939), + ShortOffsetRunHeader::new(1240, 83527), ShortOffsetRunHeader::new(1242, 90368), + ShortOffsetRunHeader::new(1243, 92160), ShortOffsetRunHeader::new(1245, 92729), + ShortOffsetRunHeader::new(1246, 93504), ShortOffsetRunHeader::new(1261, 101590), + ShortOffsetRunHeader::new(1282, 110576), ShortOffsetRunHeader::new(1287, 110883), + ShortOffsetRunHeader::new(1294, 111356), ShortOffsetRunHeader::new(1304, 113664), + ShortOffsetRunHeader::new(1305, 119808), ShortOffsetRunHeader::new(1315, 120486), + ShortOffsetRunHeader::new(1352, 122624), ShortOffsetRunHeader::new(1375, 123536), + ShortOffsetRunHeader::new(1399, 124112), ShortOffsetRunHeader::new(1403, 126464), + ShortOffsetRunHeader::new(1431, 127280), ShortOffsetRunHeader::new(1497, 131072), + ShortOffsetRunHeader::new(1503, 173792), ShortOffsetRunHeader::new(1504, 178206), + ShortOffsetRunHeader::new(1506, 183982), ShortOffsetRunHeader::new(1508, 191457), + ShortOffsetRunHeader::new(1510, 192094), ShortOffsetRunHeader::new(1512, 194560), + ShortOffsetRunHeader::new(1513, 195102), ShortOffsetRunHeader::new(1514, 196608), + ShortOffsetRunHeader::new(1515, 201547), ShortOffsetRunHeader::new(1516, 210042), ShortOffsetRunHeader::new(1518, 1324154), ]; static OFFSETS: [u8; 1519] = [ @@ -77,60 +180,58 @@ pub mod alphabetic { 1, 2, 1, 2, 1, 1, 8, 27, 4, 4, 29, 11, 5, 56, 1, 7, 14, 102, 1, 8, 4, 8, 4, 3, 10, 3, 2, 1, 16, 48, 13, 101, 24, 33, 9, 2, 4, 1, 5, 24, 2, 19, 19, 25, 7, 11, 5, 24, 1, 7, 7, 1, 8, 42, 10, 12, 3, 7, 6, 76, 1, 16, 1, 3, 4, 15, 13, 19, 1, 8, 2, 2, 2, 22, 1, 7, 1, 1, 3, 4, 3, 8, - 2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7, 1, 2, 1, - 2, 1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7, 1, 2, - 1, 5, 3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, - 8, 2, 2, 2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2, 3, - 3, 3, 12, 4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 13, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8, 2, - 1, 3, 1, 2, 2, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 5, 3, 1, - 4, 13, 3, 12, 13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3, - 24, 1, 9, 1, 1, 2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, - 24, 1, 1, 1, 19, 1, 3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 19, 4, 16, 1, 36, - 67, 55, 1, 1, 2, 5, 16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, - 2, 41, 1, 4, 2, 33, 1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, - 6, 3, 0, 2, 17, 1, 26, 5, 75, 3, 11, 7, 20, 11, 21, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, - 19, 14, 1, 4, 1, 67, 89, 7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, - 54, 28, 4, 63, 2, 20, 50, 1, 23, 2, 11, 3, 49, 52, 1, 15, 1, 8, 51, 42, 2, 4, 10, 44, 1, - 11, 14, 55, 22, 3, 10, 36, 2, 11, 5, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 19, 34, 11, - 0, 2, 6, 2, 38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, - 2, 6, 4, 13, 5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, - 1, 1, 1, 1, 4, 1, 11, 2, 4, 5, 5, 4, 1, 17, 41, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, - 1, 2, 56, 7, 1, 16, 23, 9, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, - 25, 9, 7, 5, 2, 5, 4, 86, 6, 3, 1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 67, - 46, 2, 0, 3, 16, 10, 2, 20, 47, 5, 8, 3, 113, 39, 9, 2, 103, 2, 82, 20, 21, 1, 33, 24, 52, - 12, 68, 1, 1, 44, 6, 3, 1, 1, 3, 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5, - 1, 55, 9, 14, 18, 23, 3, 69, 1, 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7, - 1, 43, 1, 14, 6, 123, 21, 0, 12, 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5, - 1, 1, 1, 2, 1, 2, 1, 108, 33, 0, 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11, - 89, 3, 6, 2, 6, 2, 6, 2, 3, 35, 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0, - 29, 3, 49, 47, 32, 13, 30, 5, 43, 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40, - 8, 52, 12, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 3, 52, 12, 0, 9, 22, 10, 8, 24, - 6, 1, 42, 1, 9, 69, 6, 2, 1, 1, 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2, 10, 22, - 10, 26, 6, 26, 38, 56, 6, 2, 64, 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28, - 27, 54, 10, 22, 10, 19, 13, 18, 110, 73, 55, 51, 13, 51, 13, 40, 34, 28, 3, 1, 5, 23, 250, - 42, 1, 2, 3, 2, 16, 6, 50, 3, 3, 29, 10, 1, 8, 22, 42, 18, 46, 21, 27, 23, 9, 70, 43, 5, - 10, 57, 9, 1, 13, 25, 23, 51, 17, 4, 8, 35, 3, 1, 9, 64, 1, 4, 9, 2, 10, 1, 1, 1, 35, 18, - 1, 34, 2, 1, 6, 4, 62, 7, 1, 1, 1, 4, 1, 15, 1, 10, 7, 57, 23, 4, 1, 8, 2, 2, 2, 22, 1, 7, - 1, 2, 1, 5, 3, 8, 2, 2, 2, 2, 3, 1, 6, 1, 5, 7, 28, 10, 1, 1, 2, 1, 1, 38, 1, 10, 1, 1, 2, - 1, 1, 4, 1, 2, 3, 1, 1, 1, 44, 66, 1, 3, 1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, - 25, 6, 34, 63, 1, 1, 3, 1, 59, 54, 2, 1, 71, 27, 2, 14, 21, 7, 185, 57, 103, 64, 31, 8, 2, - 1, 2, 8, 1, 2, 1, 30, 1, 2, 2, 2, 2, 4, 93, 8, 2, 46, 2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, - 72, 5, 1, 18, 73, 103, 8, 88, 33, 31, 9, 1, 45, 1, 7, 1, 1, 49, 30, 2, 22, 1, 14, 73, 7, 1, - 2, 1, 44, 3, 1, 1, 2, 1, 3, 1, 1, 2, 2, 24, 6, 1, 2, 1, 37, 1, 2, 1, 4, 1, 1, 23, 44, 0, - 23, 9, 17, 1, 41, 3, 3, 111, 1, 79, 0, 102, 111, 17, 196, 0, 97, 15, 0, 17, 6, 25, 0, 5, 0, - 0, 47, 0, 0, 7, 31, 17, 79, 17, 30, 18, 48, 16, 4, 31, 21, 5, 19, 0, 45, 211, 64, 32, 25, - 2, 25, 44, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 7, 9, 0, 41, 32, 97, 115, 0, 4, 1, 7, 1, 2, - 1, 0, 15, 1, 29, 3, 2, 1, 14, 4, 8, 0, 0, 107, 5, 13, 3, 9, 7, 10, 4, 1, 0, 85, 1, 71, 1, - 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, - 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, - 31, 6, 6, 213, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 112, 45, 10, 7, 16, 1, 0, 30, 18, - 44, 0, 28, 228, 30, 2, 1, 207, 31, 1, 22, 8, 2, 224, 7, 1, 4, 1, 2, 1, 15, 1, 197, 59, 68, - 3, 1, 3, 1, 0, 4, 1, 27, 1, 2, 1, 1, 2, 1, 1, 10, 1, 4, 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1, - 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, - 1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, 32, 0, 2, 0, 2, 0, 15, 0, 0, 0, - 0, 0, 5, 0, 0, + 2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7, 1, 2, 1, 2, + 1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7, 1, 2, 1, 5, + 3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2, + 2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2, 3, 3, 3, 12, + 4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 13, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8, 2, 1, 3, 1, + 2, 2, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 5, 3, 1, 4, 13, 3, + 12, 13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3, 24, 1, 9, 1, + 1, 2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, 24, 1, 1, 1, 19, + 1, 3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 19, 4, 16, 1, 36, 67, 55, 1, 1, 2, 5, + 16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, 2, 41, 1, 4, 2, 33, + 1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, 6, 3, 0, 2, 17, 1, 26, + 5, 75, 3, 11, 7, 20, 11, 21, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, 19, 14, 1, 4, 1, 67, 89, + 7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, 54, 28, 4, 63, 2, 20, 50, 1, + 23, 2, 11, 3, 49, 52, 1, 15, 1, 8, 51, 42, 2, 4, 10, 44, 1, 11, 14, 55, 22, 3, 10, 36, 2, + 11, 5, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 19, 34, 11, 0, 2, 6, 2, 38, 2, 6, 2, 8, 1, + 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, 1, + 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 11, 2, 4, 5, 5, + 4, 1, 17, 41, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 2, 56, 7, 1, 16, 23, 9, 7, 1, + 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, 25, 9, 7, 5, 2, 5, 4, 86, 6, 3, + 1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 67, 46, 2, 0, 3, 16, 10, 2, 20, 47, + 5, 8, 3, 113, 39, 9, 2, 103, 2, 82, 20, 21, 1, 33, 24, 52, 12, 68, 1, 1, 44, 6, 3, 1, 1, 3, + 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5, 1, 55, 9, 14, 18, 23, 3, 69, 1, + 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7, 1, 43, 1, 14, 6, 123, 21, 0, 12, + 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5, 1, 1, 1, 2, 1, 2, 1, 108, 33, 0, + 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11, 89, 3, 6, 2, 6, 2, 6, 2, 3, 35, + 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0, 29, 3, 49, 47, 32, 13, 30, 5, 43, + 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40, 8, 52, 12, 11, 1, 15, 1, 7, 1, 2, + 1, 11, 1, 15, 1, 7, 1, 2, 3, 52, 12, 0, 9, 22, 10, 8, 24, 6, 1, 42, 1, 9, 69, 6, 2, 1, 1, + 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2, 10, 22, 10, 26, 6, 26, 38, 56, 6, 2, 64, + 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28, 27, 54, 10, 22, 10, 19, 13, 18, + 110, 73, 55, 51, 13, 51, 13, 40, 34, 28, 3, 1, 5, 23, 250, 42, 1, 2, 3, 2, 16, 6, 50, 3, 3, + 29, 10, 1, 8, 22, 42, 18, 46, 21, 27, 23, 9, 70, 43, 5, 10, 57, 9, 1, 13, 25, 23, 51, 17, 4, + 8, 35, 3, 1, 9, 64, 1, 4, 9, 2, 10, 1, 1, 1, 35, 18, 1, 34, 2, 1, 6, 4, 62, 7, 1, 1, 1, 4, + 1, 15, 1, 10, 7, 57, 23, 4, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2, 2, 2, 3, 1, 6, + 1, 5, 7, 28, 10, 1, 1, 2, 1, 1, 38, 1, 10, 1, 1, 2, 1, 1, 4, 1, 2, 3, 1, 1, 1, 44, 66, 1, 3, + 1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, 25, 6, 34, 63, 1, 1, 3, 1, 59, 54, 2, 1, 71, + 27, 2, 14, 21, 7, 185, 57, 103, 64, 31, 8, 2, 1, 2, 8, 1, 2, 1, 30, 1, 2, 2, 2, 2, 4, 93, 8, + 2, 46, 2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, 72, 5, 1, 18, 73, 103, 8, 88, 33, 31, 9, 1, 45, + 1, 7, 1, 1, 49, 30, 2, 22, 1, 14, 73, 7, 1, 2, 1, 44, 3, 1, 1, 2, 1, 3, 1, 1, 2, 2, 24, 6, + 1, 2, 1, 37, 1, 2, 1, 4, 1, 1, 23, 44, 0, 23, 9, 17, 1, 41, 3, 3, 111, 1, 79, 0, 102, 111, + 17, 196, 0, 97, 15, 0, 17, 6, 25, 0, 5, 0, 0, 47, 0, 0, 7, 31, 17, 79, 17, 30, 18, 48, 16, + 4, 31, 21, 5, 19, 0, 45, 211, 64, 32, 25, 2, 25, 44, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 7, + 9, 0, 41, 32, 97, 115, 0, 4, 1, 7, 1, 2, 1, 0, 15, 1, 29, 3, 2, 1, 14, 4, 8, 0, 0, 107, 5, + 13, 3, 9, 7, 10, 4, 1, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, + 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, + 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 31, 6, 6, 213, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, + 1, 112, 45, 10, 7, 16, 1, 0, 30, 18, 44, 0, 28, 228, 30, 2, 1, 207, 31, 1, 22, 8, 2, 224, 7, + 1, 4, 1, 2, 1, 15, 1, 197, 59, 68, 3, 1, 3, 1, 0, 4, 1, 27, 1, 2, 1, 1, 2, 1, 1, 10, 1, 4, + 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, 1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, + 32, 0, 2, 0, 2, 0, 15, 0, 0, 0, 0, 0, 5, 0, 0, ]; - #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -153,84 +254,66 @@ pub mod alphabetic { } } +#[rustfmt::skip] pub mod case_ignorable { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 36] = [ - ShortOffsetRunHeader::new(0, 688), - ShortOffsetRunHeader::new(11, 4957), - ShortOffsetRunHeader::new(263, 5906), - ShortOffsetRunHeader::new(265, 8125), - ShortOffsetRunHeader::new(377, 11388), - ShortOffsetRunHeader::new(411, 12293), - ShortOffsetRunHeader::new(423, 40981), - ShortOffsetRunHeader::new(435, 42232), - ShortOffsetRunHeader::new(437, 42508), - ShortOffsetRunHeader::new(439, 64286), - ShortOffsetRunHeader::new(535, 65024), - ShortOffsetRunHeader::new(539, 66045), - ShortOffsetRunHeader::new(569, 67456), - ShortOffsetRunHeader::new(575, 68097), - ShortOffsetRunHeader::new(581, 68900), - ShortOffsetRunHeader::new(593, 69291), - ShortOffsetRunHeader::new(601, 71727), - ShortOffsetRunHeader::new(727, 71995), - ShortOffsetRunHeader::new(731, 73459), - ShortOffsetRunHeader::new(797, 78896), - ShortOffsetRunHeader::new(809, 90398), - ShortOffsetRunHeader::new(813, 92912), - ShortOffsetRunHeader::new(817, 93504), - ShortOffsetRunHeader::new(823, 94031), - ShortOffsetRunHeader::new(827, 110576), - ShortOffsetRunHeader::new(837, 113821), - ShortOffsetRunHeader::new(843, 118528), - ShortOffsetRunHeader::new(847, 119143), - ShortOffsetRunHeader::new(851, 121344), - ShortOffsetRunHeader::new(861, 122880), - ShortOffsetRunHeader::new(873, 123566), - ShortOffsetRunHeader::new(889, 124139), - ShortOffsetRunHeader::new(893, 125136), - ShortOffsetRunHeader::new(907, 127995), - ShortOffsetRunHeader::new(911, 917505), - ShortOffsetRunHeader::new(913, 2032112), + ShortOffsetRunHeader::new(0, 688), ShortOffsetRunHeader::new(11, 4957), + ShortOffsetRunHeader::new(263, 5906), ShortOffsetRunHeader::new(265, 8125), + ShortOffsetRunHeader::new(377, 11388), ShortOffsetRunHeader::new(411, 12293), + ShortOffsetRunHeader::new(423, 40981), ShortOffsetRunHeader::new(435, 42232), + ShortOffsetRunHeader::new(437, 42508), ShortOffsetRunHeader::new(439, 64286), + ShortOffsetRunHeader::new(535, 65024), ShortOffsetRunHeader::new(539, 66045), + ShortOffsetRunHeader::new(569, 67456), ShortOffsetRunHeader::new(575, 68097), + ShortOffsetRunHeader::new(581, 68900), ShortOffsetRunHeader::new(593, 69291), + ShortOffsetRunHeader::new(601, 71727), ShortOffsetRunHeader::new(727, 71995), + ShortOffsetRunHeader::new(731, 73459), ShortOffsetRunHeader::new(797, 78896), + ShortOffsetRunHeader::new(809, 90398), ShortOffsetRunHeader::new(813, 92912), + ShortOffsetRunHeader::new(817, 93504), ShortOffsetRunHeader::new(823, 94031), + ShortOffsetRunHeader::new(827, 110576), ShortOffsetRunHeader::new(837, 113821), + ShortOffsetRunHeader::new(843, 118528), ShortOffsetRunHeader::new(847, 119143), + ShortOffsetRunHeader::new(851, 121344), ShortOffsetRunHeader::new(861, 122880), + ShortOffsetRunHeader::new(873, 123566), ShortOffsetRunHeader::new(889, 124139), + ShortOffsetRunHeader::new(893, 125136), ShortOffsetRunHeader::new(907, 127995), + ShortOffsetRunHeader::new(911, 917505), ShortOffsetRunHeader::new(913, 2032112), ]; static OFFSETS: [u8; 919] = [ 168, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2, 1, 1, 251, 7, 207, 1, 5, 1, 49, - 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35, 1, 10, 21, 16, 1, 101, 8, 1, - 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24, 24, 43, 3, 44, 1, 7, 2, 5, - 9, 41, 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1, 20, 2, - 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, - 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3, 1, - 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, - 4, 4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 7, - 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, - 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 2, 30, 2, 30, 2, 64, - 2, 1, 7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, - 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 46, 2, 12, 20, 4, 48, - 1, 1, 5, 1, 1, 5, 1, 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, - 64, 6, 82, 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1, 3, 11, 3, 13, - 3, 13, 3, 13, 2, 12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, - 0, 2, 113, 3, 125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, - 6, 0, 1, 98, 4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 102, 4, 3, 2, 8, 1, 3, 1, - 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, - 21, 2, 66, 6, 2, 2, 2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, - 14, 2, 5, 2, 1, 1, 100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3, 1, 12, 16, - 34, 1, 2, 1, 169, 1, 7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, - 1, 149, 5, 0, 6, 1, 42, 1, 9, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 38, 1, 26, 5, 1, - 1, 0, 2, 24, 1, 52, 6, 70, 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, - 1, 4, 1, 10, 1, 50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, - 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 70, 6, 13, 1, 1, 1, 1, 1, 14, 2, - 85, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, - 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, - 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, - 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, - 1, 1, 1, 65, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, 1, 23, 1, 0, 17, 6, 15, 0, 12, 3, 3, 0, 5, 59, - 7, 9, 4, 0, 3, 40, 2, 0, 1, 63, 17, 64, 2, 1, 2, 13, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, - 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, - 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, 0, 1, 61, 4, 0, 5, 254, 2, 243, 1, 2, 1, 7, - 2, 5, 1, 9, 1, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0, + 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35, 1, 10, 21, 16, 1, 101, 8, 1, 10, + 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24, 24, 43, 3, 44, 1, 7, 2, 5, 9, 41, + 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2, + 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, + 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3, + 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1, + 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1, + 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, + 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1, + 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, + 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 46, 2, 12, 20, 4, 48, 1, 1, 5, 1, 1, 5, 1, + 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, 64, 6, 82, 3, 1, 13, + 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1, 3, 11, 3, 13, 3, 13, 3, 13, 2, 12, + 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, 0, 2, 113, 3, 125, 1, + 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, + 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 102, 4, 3, 2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, + 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, + 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, + 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3, 1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, + 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, + 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 38, 1, 26, 5, 1, 1, 0, 2, 24, 1, 52, 6, 70, 11, + 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, 1, 4, 1, 10, 1, 50, 3, 36, 5, 1, + 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, + 2, 3, 1, 37, 7, 3, 5, 70, 6, 13, 1, 1, 1, 1, 1, 14, 2, 85, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, + 1, 4, 2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, + 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, + 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, + 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 65, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, + 1, 23, 1, 0, 17, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 9, 4, 0, 3, 40, 2, 0, 1, 63, 17, 64, 2, 1, + 2, 13, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, + 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, + 0, 1, 61, 4, 0, 5, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 9, 1, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, + 128, 240, 0, ]; - #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -253,48 +336,37 @@ pub mod case_ignorable { } } +#[rustfmt::skip] pub mod cased { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 22] = [ - ShortOffsetRunHeader::new(0, 4256), - ShortOffsetRunHeader::new(51, 5024), - ShortOffsetRunHeader::new(61, 7296), - ShortOffsetRunHeader::new(65, 7958), - ShortOffsetRunHeader::new(74, 9398), - ShortOffsetRunHeader::new(149, 11264), - ShortOffsetRunHeader::new(151, 42560), - ShortOffsetRunHeader::new(163, 43824), - ShortOffsetRunHeader::new(177, 64256), - ShortOffsetRunHeader::new(183, 65313), - ShortOffsetRunHeader::new(187, 66560), - ShortOffsetRunHeader::new(191, 67456), - ShortOffsetRunHeader::new(213, 68736), - ShortOffsetRunHeader::new(221, 71840), - ShortOffsetRunHeader::new(229, 93760), - ShortOffsetRunHeader::new(231, 119808), - ShortOffsetRunHeader::new(237, 120486), - ShortOffsetRunHeader::new(274, 122624), - ShortOffsetRunHeader::new(297, 122928), - ShortOffsetRunHeader::new(303, 125184), - ShortOffsetRunHeader::new(305, 127280), - ShortOffsetRunHeader::new(307, 1241482), + ShortOffsetRunHeader::new(0, 4256), ShortOffsetRunHeader::new(51, 5024), + ShortOffsetRunHeader::new(61, 7296), ShortOffsetRunHeader::new(65, 7958), + ShortOffsetRunHeader::new(74, 9398), ShortOffsetRunHeader::new(149, 11264), + ShortOffsetRunHeader::new(151, 42560), ShortOffsetRunHeader::new(163, 43824), + ShortOffsetRunHeader::new(177, 64256), ShortOffsetRunHeader::new(183, 65313), + ShortOffsetRunHeader::new(187, 66560), ShortOffsetRunHeader::new(191, 67456), + ShortOffsetRunHeader::new(213, 68736), ShortOffsetRunHeader::new(221, 71840), + ShortOffsetRunHeader::new(229, 93760), ShortOffsetRunHeader::new(231, 119808), + ShortOffsetRunHeader::new(237, 120486), ShortOffsetRunHeader::new(274, 122624), + ShortOffsetRunHeader::new(297, 122928), ShortOffsetRunHeader::new(303, 125184), + ShortOffsetRunHeader::new(305, 127280), ShortOffsetRunHeader::new(307, 1241482), ]; static OFFSETS: [u8; 313] = [ 170, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 2, 35, 7, 2, 30, 5, 96, 1, 42, 4, - 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, - 1, 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, - 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, - 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, - 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, - 18, 30, 132, 102, 3, 4, 1, 77, 20, 6, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, - 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, - 42, 1, 9, 0, 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 32, 25, 2, 25, 0, 85, 1, 71, 1, 2, - 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, - 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, - 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, + 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, 1, + 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, 2, 8, + 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, + 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, 1, 2, 4, + 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, 18, 30, 132, + 102, 3, 4, 1, 77, 20, 6, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, 0, 80, 96, + 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, + 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 32, 25, 2, 25, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, + 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, + 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0, + 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, ]; - #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -317,75 +389,59 @@ pub mod cased { } } +#[rustfmt::skip] pub mod grapheme_extend { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 33] = [ - ShortOffsetRunHeader::new(0, 768), - ShortOffsetRunHeader::new(1, 1155), - ShortOffsetRunHeader::new(3, 1425), - ShortOffsetRunHeader::new(5, 4957), - ShortOffsetRunHeader::new(249, 5906), - ShortOffsetRunHeader::new(251, 8204), - ShortOffsetRunHeader::new(347, 11503), - ShortOffsetRunHeader::new(351, 12330), - ShortOffsetRunHeader::new(357, 42607), - ShortOffsetRunHeader::new(361, 43010), - ShortOffsetRunHeader::new(369, 64286), - ShortOffsetRunHeader::new(435, 65024), - ShortOffsetRunHeader::new(437, 65438), - ShortOffsetRunHeader::new(441, 66045), - ShortOffsetRunHeader::new(443, 68097), - ShortOffsetRunHeader::new(449, 68900), - ShortOffsetRunHeader::new(461, 69291), - ShortOffsetRunHeader::new(465, 71727), - ShortOffsetRunHeader::new(601, 73459), - ShortOffsetRunHeader::new(669, 78912), - ShortOffsetRunHeader::new(679, 90398), - ShortOffsetRunHeader::new(683, 92912), - ShortOffsetRunHeader::new(687, 94031), - ShortOffsetRunHeader::new(691, 113821), - ShortOffsetRunHeader::new(699, 118528), - ShortOffsetRunHeader::new(701, 119141), - ShortOffsetRunHeader::new(705, 121344), - ShortOffsetRunHeader::new(717, 122880), - ShortOffsetRunHeader::new(729, 123566), - ShortOffsetRunHeader::new(743, 124140), - ShortOffsetRunHeader::new(747, 125136), - ShortOffsetRunHeader::new(759, 917536), + ShortOffsetRunHeader::new(0, 768), ShortOffsetRunHeader::new(1, 1155), + ShortOffsetRunHeader::new(3, 1425), ShortOffsetRunHeader::new(5, 4957), + ShortOffsetRunHeader::new(249, 5906), ShortOffsetRunHeader::new(251, 8204), + ShortOffsetRunHeader::new(347, 11503), ShortOffsetRunHeader::new(351, 12330), + ShortOffsetRunHeader::new(357, 42607), ShortOffsetRunHeader::new(361, 43010), + ShortOffsetRunHeader::new(369, 64286), ShortOffsetRunHeader::new(435, 65024), + ShortOffsetRunHeader::new(437, 65438), ShortOffsetRunHeader::new(441, 66045), + ShortOffsetRunHeader::new(443, 68097), ShortOffsetRunHeader::new(449, 68900), + ShortOffsetRunHeader::new(461, 69291), ShortOffsetRunHeader::new(465, 71727), + ShortOffsetRunHeader::new(601, 73459), ShortOffsetRunHeader::new(669, 78912), + ShortOffsetRunHeader::new(679, 90398), ShortOffsetRunHeader::new(683, 92912), + ShortOffsetRunHeader::new(687, 94031), ShortOffsetRunHeader::new(691, 113821), + ShortOffsetRunHeader::new(699, 118528), ShortOffsetRunHeader::new(701, 119141), + ShortOffsetRunHeader::new(705, 121344), ShortOffsetRunHeader::new(717, 122880), + ShortOffsetRunHeader::new(729, 123566), ShortOffsetRunHeader::new(743, 124140), + ShortOffsetRunHeader::new(747, 125136), ShortOffsetRunHeader::new(759, 917536), ShortOffsetRunHeader::new(763, 2032112), ]; static OFFSETS: [u8; 767] = [ 0, 112, 0, 7, 0, 45, 1, 1, 1, 2, 1, 2, 1, 1, 72, 11, 48, 21, 16, 1, 101, 7, 2, 6, 2, 2, 1, - 4, 35, 1, 30, 27, 91, 11, 58, 9, 9, 1, 24, 4, 1, 9, 1, 3, 1, 5, 43, 3, 59, 9, 42, 24, 1, - 32, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 29, 1, 58, 1, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 26, - 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, - 22, 6, 1, 1, 58, 1, 1, 2, 1, 4, 8, 1, 7, 3, 10, 2, 30, 1, 59, 1, 1, 1, 12, 1, 9, 1, 40, 1, - 3, 1, 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 2, 1, 1, 3, 3, 1, 4, 7, 2, 11, - 2, 28, 2, 57, 2, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 29, 1, 72, 1, 4, 1, 2, 3, 1, 1, 8, 1, 81, - 1, 2, 7, 12, 8, 98, 1, 2, 9, 11, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, - 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 0, 3, 0, - 4, 28, 3, 29, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 9, 1, 45, 3, 1, 1, 117, 2, 34, 1, 118, 3, - 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 48, 46, 2, 12, - 20, 4, 48, 10, 4, 3, 38, 9, 12, 2, 32, 4, 2, 6, 56, 1, 1, 2, 3, 1, 1, 5, 56, 8, 2, 2, 152, - 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 198, 64, 0, 1, 195, 33, 0, 3, 141, 1, 96, 32, 0, 6, 105, - 2, 0, 4, 1, 10, 32, 2, 80, 2, 0, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, - 25, 11, 1, 1, 44, 3, 48, 1, 2, 4, 2, 2, 2, 1, 36, 1, 67, 6, 2, 2, 2, 2, 12, 1, 8, 1, 47, 1, - 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 42, 2, 8, 1, 238, 1, 2, 1, 4, 1, 0, 1, 0, 16, 16, 16, 0, 2, - 0, 1, 226, 1, 149, 5, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 65, 5, 0, 2, 77, 6, 70, - 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 7, 1, 61, 3, 36, 5, 1, 8, 62, - 1, 12, 2, 52, 9, 1, 1, 8, 4, 2, 1, 95, 3, 2, 4, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 1, - 1, 1, 1, 12, 1, 9, 1, 14, 7, 3, 5, 67, 1, 2, 6, 1, 1, 2, 1, 1, 3, 4, 3, 1, 1, 14, 2, 85, 8, - 2, 3, 1, 1, 23, 1, 81, 1, 2, 6, 1, 1, 2, 1, 1, 2, 1, 2, 235, 1, 2, 4, 6, 2, 1, 2, 27, 2, - 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 8, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 245, 1, 10, 4, - 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, - 3, 1, 1, 201, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, - 2, 3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 3, 23, 1, 0, 1, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 0, - 1, 63, 4, 81, 1, 11, 2, 0, 2, 0, 46, 2, 23, 0, 5, 3, 6, 8, 8, 2, 7, 30, 4, 148, 3, 0, 55, - 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61, - 4, 0, 4, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 0, 7, 109, 7, 0, 96, 128, 240, 0, + 4, 35, 1, 30, 27, 91, 11, 58, 9, 9, 1, 24, 4, 1, 9, 1, 3, 1, 5, 43, 3, 59, 9, 42, 24, 1, 32, + 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 29, 1, 58, 1, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 26, 1, 2, + 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, + 1, 1, 58, 1, 1, 2, 1, 4, 8, 1, 7, 3, 10, 2, 30, 1, 59, 1, 1, 1, 12, 1, 9, 1, 40, 1, 3, 1, + 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 2, 1, 1, 3, 3, 1, 4, 7, 2, 11, 2, 28, + 2, 57, 2, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 29, 1, 72, 1, 4, 1, 2, 3, 1, 1, 8, 1, 81, 1, 2, 7, + 12, 8, 98, 1, 2, 9, 11, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, + 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 0, 3, 0, 4, 28, 3, + 29, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 9, 1, 45, 3, 1, 1, 117, 2, 34, 1, 118, 3, 4, 2, 9, + 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 48, 46, 2, 12, 20, 4, 48, + 10, 4, 3, 38, 9, 12, 2, 32, 4, 2, 6, 56, 1, 1, 2, 3, 1, 1, 5, 56, 8, 2, 2, 152, 3, 1, 13, 1, + 7, 4, 1, 6, 1, 3, 2, 198, 64, 0, 1, 195, 33, 0, 3, 141, 1, 96, 32, 0, 6, 105, 2, 0, 4, 1, + 10, 32, 2, 80, 2, 0, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 1, 1, + 44, 3, 48, 1, 2, 4, 2, 2, 2, 1, 36, 1, 67, 6, 2, 2, 2, 2, 12, 1, 8, 1, 47, 1, 51, 1, 1, 3, + 2, 2, 5, 2, 1, 1, 42, 2, 8, 1, 238, 1, 2, 1, 4, 1, 0, 1, 0, 16, 16, 16, 0, 2, 0, 1, 226, 1, + 149, 5, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 65, 5, 0, 2, 77, 6, 70, 11, 49, 4, 123, + 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 7, 1, 61, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, + 1, 1, 8, 4, 2, 1, 95, 3, 2, 4, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 1, 1, 1, 1, 12, 1, 9, + 1, 14, 7, 3, 5, 67, 1, 2, 6, 1, 1, 2, 1, 1, 3, 4, 3, 1, 1, 14, 2, 85, 8, 2, 3, 1, 1, 23, 1, + 81, 1, 2, 6, 1, 1, 2, 1, 1, 2, 1, 2, 235, 1, 2, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, + 106, 1, 1, 1, 2, 8, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 245, 1, 10, 4, 4, 1, 144, 4, 2, 2, + 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 198, 1, 1, 3, 1, 1, 201, 7, 1, 6, + 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 0, 2, 11, + 2, 52, 5, 5, 3, 23, 1, 0, 1, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 0, 1, 63, 4, 81, 1, 11, 2, 0, + 2, 0, 46, 2, 23, 0, 5, 3, 6, 8, 8, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, + 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61, 4, 0, 4, 254, 2, 243, 1, 2, 1, + 7, 2, 5, 1, 0, 7, 109, 7, 0, 96, 128, 240, 0, ]; - #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -408,13 +464,14 @@ pub mod grapheme_extend { } } +#[rustfmt::skip] pub mod lowercase { static BITSET_CHUNKS_MAP: [u8; 123] = [ 12, 17, 0, 0, 9, 0, 0, 13, 14, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, - 0, 3, 18, 0, 7, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, + 3, 18, 0, 7, ]; static BITSET_INDEX_CHUNKS: [[u8; 16]; 20] = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], @@ -498,107 +555,66 @@ pub mod lowercase { 0b1110101111000000000000000000000000001111111111111111111111111100, ]; static BITSET_MAPPING: [(u8, u8); 22] = [ - (0, 64), - (1, 184), - (1, 182), - (1, 179), - (1, 172), - (1, 168), - (1, 161), - (1, 146), - (1, 144), - (1, 140), - (1, 136), - (1, 132), - (2, 146), - (2, 144), - (2, 83), - (3, 93), - (3, 147), - (3, 133), - (4, 12), - (4, 6), - (5, 187), - (6, 78), + (0, 64), (1, 184), (1, 182), (1, 179), (1, 172), (1, 168), (1, 161), (1, 146), (1, 144), + (1, 140), (1, 136), (1, 132), (2, 146), (2, 144), (2, 83), (3, 93), (3, 147), (3, 133), + (4, 12), (4, 6), (5, 187), (6, 78), ]; pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); - (c as u32) >= 0xaa - && super::bitset_search( - c as u32, - &BITSET_CHUNKS_MAP, - &BITSET_INDEX_CHUNKS, - &BITSET_CANONICAL, - &BITSET_MAPPING, - ) + (c as u32) >= 0xaa && + super::bitset_search( + c as u32, + &BITSET_CHUNKS_MAP, + &BITSET_INDEX_CHUNKS, + &BITSET_CANONICAL, + &BITSET_MAPPING, + ) } } +#[rustfmt::skip] pub mod n { use super::ShortOffsetRunHeader; static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 43] = [ - ShortOffsetRunHeader::new(0, 1632), - ShortOffsetRunHeader::new(7, 2406), - ShortOffsetRunHeader::new(13, 4160), - ShortOffsetRunHeader::new(47, 4969), - ShortOffsetRunHeader::new(51, 5870), - ShortOffsetRunHeader::new(53, 6470), - ShortOffsetRunHeader::new(61, 8304), - ShortOffsetRunHeader::new(77, 9312), - ShortOffsetRunHeader::new(87, 10102), - ShortOffsetRunHeader::new(91, 11517), - ShortOffsetRunHeader::new(93, 12295), - ShortOffsetRunHeader::new(95, 12690), - ShortOffsetRunHeader::new(101, 42528), - ShortOffsetRunHeader::new(113, 43056), - ShortOffsetRunHeader::new(117, 44016), - ShortOffsetRunHeader::new(129, 65296), - ShortOffsetRunHeader::new(131, 65799), - ShortOffsetRunHeader::new(133, 66273), - ShortOffsetRunHeader::new(139, 67672), - ShortOffsetRunHeader::new(151, 68858), - ShortOffsetRunHeader::new(181, 69216), - ShortOffsetRunHeader::new(187, 70736), - ShortOffsetRunHeader::new(207, 71248), - ShortOffsetRunHeader::new(211, 71904), - ShortOffsetRunHeader::new(219, 72688), - ShortOffsetRunHeader::new(223, 73552), - ShortOffsetRunHeader::new(233, 74752), - ShortOffsetRunHeader::new(237, 90416), - ShortOffsetRunHeader::new(239, 92768), - ShortOffsetRunHeader::new(241, 93552), - ShortOffsetRunHeader::new(249, 93824), - ShortOffsetRunHeader::new(251, 94196), - ShortOffsetRunHeader::new(253, 118000), - ShortOffsetRunHeader::new(255, 119488), - ShortOffsetRunHeader::new(257, 120782), - ShortOffsetRunHeader::new(263, 123200), - ShortOffsetRunHeader::new(265, 123632), - ShortOffsetRunHeader::new(267, 124144), - ShortOffsetRunHeader::new(269, 125127), - ShortOffsetRunHeader::new(273, 126065), - ShortOffsetRunHeader::new(277, 127232), - ShortOffsetRunHeader::new(287, 130032), + ShortOffsetRunHeader::new(0, 1632), ShortOffsetRunHeader::new(7, 2406), + ShortOffsetRunHeader::new(13, 4160), ShortOffsetRunHeader::new(47, 4969), + ShortOffsetRunHeader::new(51, 5870), ShortOffsetRunHeader::new(53, 6470), + ShortOffsetRunHeader::new(61, 8304), ShortOffsetRunHeader::new(77, 9312), + ShortOffsetRunHeader::new(87, 10102), ShortOffsetRunHeader::new(91, 11517), + ShortOffsetRunHeader::new(93, 12295), ShortOffsetRunHeader::new(95, 12690), + ShortOffsetRunHeader::new(101, 42528), ShortOffsetRunHeader::new(113, 43056), + ShortOffsetRunHeader::new(117, 44016), ShortOffsetRunHeader::new(129, 65296), + ShortOffsetRunHeader::new(131, 65799), ShortOffsetRunHeader::new(133, 66273), + ShortOffsetRunHeader::new(139, 67672), ShortOffsetRunHeader::new(151, 68858), + ShortOffsetRunHeader::new(181, 69216), ShortOffsetRunHeader::new(187, 70736), + ShortOffsetRunHeader::new(207, 71248), ShortOffsetRunHeader::new(211, 71904), + ShortOffsetRunHeader::new(219, 72688), ShortOffsetRunHeader::new(223, 73552), + ShortOffsetRunHeader::new(233, 74752), ShortOffsetRunHeader::new(237, 90416), + ShortOffsetRunHeader::new(239, 92768), ShortOffsetRunHeader::new(241, 93552), + ShortOffsetRunHeader::new(249, 93824), ShortOffsetRunHeader::new(251, 94196), + ShortOffsetRunHeader::new(253, 118000), ShortOffsetRunHeader::new(255, 119488), + ShortOffsetRunHeader::new(257, 120782), ShortOffsetRunHeader::new(263, 123200), + ShortOffsetRunHeader::new(265, 123632), ShortOffsetRunHeader::new(267, 124144), + ShortOffsetRunHeader::new(269, 125127), ShortOffsetRunHeader::new(273, 126065), + ShortOffsetRunHeader::new(277, 127232), ShortOffsetRunHeader::new(287, 130032), ShortOffsetRunHeader::new(289, 1244154), ]; static OFFSETS: [u8; 291] = [ 178, 2, 5, 1, 2, 3, 0, 10, 134, 10, 198, 10, 0, 10, 118, 10, 4, 6, 108, 10, 118, 10, 118, 10, 2, 6, 110, 13, 115, 10, 8, 7, 103, 10, 104, 7, 7, 19, 109, 10, 96, 10, 118, 10, 70, 20, - 0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10, 182, - 10, 86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0, 1, - 0, 1, 25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6, 154, - 10, 38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4, 29, - 1, 8, 1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9, - 52, 2, 30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 6, 10, 0, 31, 158, 10, 42, 4, 112, - 7, 134, 30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 6, - 20, 76, 12, 0, 19, 93, 10, 0, 10, 86, 29, 227, 10, 70, 10, 54, 10, 0, 10, 102, 21, 0, 111, - 0, 10, 0, 10, 86, 10, 134, 10, 1, 7, 0, 10, 0, 23, 0, 3, 0, 10, 0, 20, 12, 20, 108, 25, 0, - 50, 0, 10, 0, 10, 0, 10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, - 0, 10, 0, + 0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10, 182, 10, + 86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0, 1, 0, 1, + 25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6, 154, 10, + 38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4, 29, 1, 8, + 1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9, 52, 2, + 30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 6, 10, 0, 31, 158, 10, 42, 4, 112, 7, 134, + 30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 6, 20, 76, 12, + 0, 19, 93, 10, 0, 10, 86, 29, 227, 10, 70, 10, 54, 10, 0, 10, 102, 21, 0, 111, 0, 10, 0, 10, + 86, 10, 134, 10, 1, 7, 0, 10, 0, 23, 0, 3, 0, 10, 0, 20, 12, 20, 108, 25, 0, 50, 0, 10, 0, + 10, 0, 10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0, ]; - #[inline] pub fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); @@ -621,13 +637,14 @@ pub mod n { } } +#[rustfmt::skip] pub mod uppercase { static BITSET_CHUNKS_MAP: [u8; 125] = [ 3, 14, 6, 6, 0, 6, 6, 2, 5, 12, 6, 15, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 6, 13, 6, 11, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 16, 6, - 6, 6, 6, 10, 6, 4, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 7, 6, 13, 6, 11, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 16, 6, 6, + 6, 6, 10, 6, 4, ]; static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [ [44, 44, 5, 35, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 5, 0], @@ -695,1001 +712,877 @@ pub mod uppercase { 0b1111111100000000111111110000000000111111000000001111111100000000, ]; static BITSET_MAPPING: [(u8, u8); 25] = [ - (0, 182), - (0, 74), - (0, 166), - (0, 162), - (0, 159), - (0, 150), - (0, 148), - (0, 142), - (0, 134), - (0, 131), - (0, 64), - (1, 66), - (1, 70), - (1, 83), - (1, 12), - (1, 8), - (2, 146), - (2, 140), - (2, 134), - (2, 130), - (3, 164), - (3, 146), - (3, 20), - (4, 178), - (4, 171), + (0, 182), (0, 74), (0, 166), (0, 162), (0, 159), (0, 150), (0, 148), (0, 142), (0, 134), + (0, 131), (0, 64), (1, 66), (1, 70), (1, 83), (1, 12), (1, 8), (2, 146), (2, 140), (2, 134), + (2, 130), (3, 164), (3, 146), (3, 20), (4, 178), (4, 171), ]; pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); - (c as u32) >= 0xc0 - && super::bitset_search( - c as u32, - &BITSET_CHUNKS_MAP, - &BITSET_INDEX_CHUNKS, - &BITSET_CANONICAL, - &BITSET_MAPPING, - ) + (c as u32) >= 0xc0 && + super::bitset_search( + c as u32, + &BITSET_CHUNKS_MAP, + &BITSET_INDEX_CHUNKS, + &BITSET_CANONICAL, + &BITSET_MAPPING, + ) } } +#[rustfmt::skip] pub mod white_space { static WHITESPACE_MAP: [u8; 256] = [ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; - #[inline] pub const fn lookup(c: char) -> bool { debug_assert!(!c.is_ascii()); match c as u32 >> 8 { - 0x00 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0, - 0x16 => c as u32 == 0x1680, - 0x20 => WHITESPACE_MAP[c as usize & 0xff] & 2 != 0, - 0x30 => c as u32 == 0x3000, + 0 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0, + 22 => c as u32 == 0x1680, + 32 => WHITESPACE_MAP[c as usize & 0xff] & 2 != 0, + 48 => c as u32 == 0x3000, _ => false, } } } +#[rustfmt::skip] pub mod conversions { - #[rustfmt::skip] - static LOWERCASE_TABLE: &[(char, u32); 1462] = &[ - ('\u{c0}', 0xe0), ('\u{c1}', 0xe1), ('\u{c2}', 0xe2), ('\u{c3}', 0xe3), ('\u{c4}', 0xe4), - ('\u{c5}', 0xe5), ('\u{c6}', 0xe6), ('\u{c7}', 0xe7), ('\u{c8}', 0xe8), ('\u{c9}', 0xe9), - ('\u{ca}', 0xea), ('\u{cb}', 0xeb), ('\u{cc}', 0xec), ('\u{cd}', 0xed), ('\u{ce}', 0xee), - ('\u{cf}', 0xef), ('\u{d0}', 0xf0), ('\u{d1}', 0xf1), ('\u{d2}', 0xf2), ('\u{d3}', 0xf3), - ('\u{d4}', 0xf4), ('\u{d5}', 0xf5), ('\u{d6}', 0xf6), ('\u{d8}', 0xf8), ('\u{d9}', 0xf9), - ('\u{da}', 0xfa), ('\u{db}', 0xfb), ('\u{dc}', 0xfc), ('\u{dd}', 0xfd), ('\u{de}', 0xfe), - ('\u{100}', 0x101), ('\u{102}', 0x103), ('\u{104}', 0x105), ('\u{106}', 0x107), - ('\u{108}', 0x109), ('\u{10a}', 0x10b), ('\u{10c}', 0x10d), ('\u{10e}', 0x10f), - ('\u{110}', 0x111), ('\u{112}', 0x113), ('\u{114}', 0x115), ('\u{116}', 0x117), - ('\u{118}', 0x119), ('\u{11a}', 0x11b), ('\u{11c}', 0x11d), ('\u{11e}', 0x11f), - ('\u{120}', 0x121), ('\u{122}', 0x123), ('\u{124}', 0x125), ('\u{126}', 0x127), - ('\u{128}', 0x129), ('\u{12a}', 0x12b), ('\u{12c}', 0x12d), ('\u{12e}', 0x12f), - ('\u{130}', 0x400000), ('\u{132}', 0x133), ('\u{134}', 0x135), ('\u{136}', 0x137), - ('\u{139}', 0x13a), ('\u{13b}', 0x13c), ('\u{13d}', 0x13e), ('\u{13f}', 0x140), - ('\u{141}', 0x142), ('\u{143}', 0x144), ('\u{145}', 0x146), ('\u{147}', 0x148), - ('\u{14a}', 0x14b), ('\u{14c}', 0x14d), ('\u{14e}', 0x14f), ('\u{150}', 0x151), - ('\u{152}', 0x153), ('\u{154}', 0x155), ('\u{156}', 0x157), ('\u{158}', 0x159), - ('\u{15a}', 0x15b), ('\u{15c}', 0x15d), ('\u{15e}', 0x15f), ('\u{160}', 0x161), - ('\u{162}', 0x163), ('\u{164}', 0x165), ('\u{166}', 0x167), ('\u{168}', 0x169), - ('\u{16a}', 0x16b), ('\u{16c}', 0x16d), ('\u{16e}', 0x16f), ('\u{170}', 0x171), - ('\u{172}', 0x173), ('\u{174}', 0x175), ('\u{176}', 0x177), ('\u{178}', 0xff), - ('\u{179}', 0x17a), ('\u{17b}', 0x17c), ('\u{17d}', 0x17e), ('\u{181}', 0x253), - ('\u{182}', 0x183), ('\u{184}', 0x185), ('\u{186}', 0x254), ('\u{187}', 0x188), - ('\u{189}', 0x256), ('\u{18a}', 0x257), ('\u{18b}', 0x18c), ('\u{18e}', 0x1dd), - ('\u{18f}', 0x259), ('\u{190}', 0x25b), ('\u{191}', 0x192), ('\u{193}', 0x260), - ('\u{194}', 0x263), ('\u{196}', 0x269), ('\u{197}', 0x268), ('\u{198}', 0x199), - ('\u{19c}', 0x26f), ('\u{19d}', 0x272), ('\u{19f}', 0x275), ('\u{1a0}', 0x1a1), - ('\u{1a2}', 0x1a3), ('\u{1a4}', 0x1a5), ('\u{1a6}', 0x280), ('\u{1a7}', 0x1a8), - ('\u{1a9}', 0x283), ('\u{1ac}', 0x1ad), ('\u{1ae}', 0x288), ('\u{1af}', 0x1b0), - ('\u{1b1}', 0x28a), ('\u{1b2}', 0x28b), ('\u{1b3}', 0x1b4), ('\u{1b5}', 0x1b6), - ('\u{1b7}', 0x292), ('\u{1b8}', 0x1b9), ('\u{1bc}', 0x1bd), ('\u{1c4}', 0x1c6), - ('\u{1c5}', 0x1c6), ('\u{1c7}', 0x1c9), ('\u{1c8}', 0x1c9), ('\u{1ca}', 0x1cc), - ('\u{1cb}', 0x1cc), ('\u{1cd}', 0x1ce), ('\u{1cf}', 0x1d0), ('\u{1d1}', 0x1d2), - ('\u{1d3}', 0x1d4), ('\u{1d5}', 0x1d6), ('\u{1d7}', 0x1d8), ('\u{1d9}', 0x1da), - ('\u{1db}', 0x1dc), ('\u{1de}', 0x1df), ('\u{1e0}', 0x1e1), ('\u{1e2}', 0x1e3), - ('\u{1e4}', 0x1e5), ('\u{1e6}', 0x1e7), ('\u{1e8}', 0x1e9), ('\u{1ea}', 0x1eb), - ('\u{1ec}', 0x1ed), ('\u{1ee}', 0x1ef), ('\u{1f1}', 0x1f3), ('\u{1f2}', 0x1f3), - ('\u{1f4}', 0x1f5), ('\u{1f6}', 0x195), ('\u{1f7}', 0x1bf), ('\u{1f8}', 0x1f9), - ('\u{1fa}', 0x1fb), ('\u{1fc}', 0x1fd), ('\u{1fe}', 0x1ff), ('\u{200}', 0x201), - ('\u{202}', 0x203), ('\u{204}', 0x205), ('\u{206}', 0x207), ('\u{208}', 0x209), - ('\u{20a}', 0x20b), ('\u{20c}', 0x20d), ('\u{20e}', 0x20f), ('\u{210}', 0x211), - ('\u{212}', 0x213), ('\u{214}', 0x215), ('\u{216}', 0x217), ('\u{218}', 0x219), - ('\u{21a}', 0x21b), ('\u{21c}', 0x21d), ('\u{21e}', 0x21f), ('\u{220}', 0x19e), - ('\u{222}', 0x223), ('\u{224}', 0x225), ('\u{226}', 0x227), ('\u{228}', 0x229), - ('\u{22a}', 0x22b), ('\u{22c}', 0x22d), ('\u{22e}', 0x22f), ('\u{230}', 0x231), - ('\u{232}', 0x233), ('\u{23a}', 0x2c65), ('\u{23b}', 0x23c), ('\u{23d}', 0x19a), - ('\u{23e}', 0x2c66), ('\u{241}', 0x242), ('\u{243}', 0x180), ('\u{244}', 0x289), - ('\u{245}', 0x28c), ('\u{246}', 0x247), ('\u{248}', 0x249), ('\u{24a}', 0x24b), - ('\u{24c}', 0x24d), ('\u{24e}', 0x24f), ('\u{370}', 0x371), ('\u{372}', 0x373), - ('\u{376}', 0x377), ('\u{37f}', 0x3f3), ('\u{386}', 0x3ac), ('\u{388}', 0x3ad), - ('\u{389}', 0x3ae), ('\u{38a}', 0x3af), ('\u{38c}', 0x3cc), ('\u{38e}', 0x3cd), - ('\u{38f}', 0x3ce), ('\u{391}', 0x3b1), ('\u{392}', 0x3b2), ('\u{393}', 0x3b3), - ('\u{394}', 0x3b4), ('\u{395}', 0x3b5), ('\u{396}', 0x3b6), ('\u{397}', 0x3b7), - ('\u{398}', 0x3b8), ('\u{399}', 0x3b9), ('\u{39a}', 0x3ba), ('\u{39b}', 0x3bb), - ('\u{39c}', 0x3bc), ('\u{39d}', 0x3bd), ('\u{39e}', 0x3be), ('\u{39f}', 0x3bf), - ('\u{3a0}', 0x3c0), ('\u{3a1}', 0x3c1), ('\u{3a3}', 0x3c3), ('\u{3a4}', 0x3c4), - ('\u{3a5}', 0x3c5), ('\u{3a6}', 0x3c6), ('\u{3a7}', 0x3c7), ('\u{3a8}', 0x3c8), - ('\u{3a9}', 0x3c9), ('\u{3aa}', 0x3ca), ('\u{3ab}', 0x3cb), ('\u{3cf}', 0x3d7), - ('\u{3d8}', 0x3d9), ('\u{3da}', 0x3db), ('\u{3dc}', 0x3dd), ('\u{3de}', 0x3df), - ('\u{3e0}', 0x3e1), ('\u{3e2}', 0x3e3), ('\u{3e4}', 0x3e5), ('\u{3e6}', 0x3e7), - ('\u{3e8}', 0x3e9), ('\u{3ea}', 0x3eb), ('\u{3ec}', 0x3ed), ('\u{3ee}', 0x3ef), - ('\u{3f4}', 0x3b8), ('\u{3f7}', 0x3f8), ('\u{3f9}', 0x3f2), ('\u{3fa}', 0x3fb), - ('\u{3fd}', 0x37b), ('\u{3fe}', 0x37c), ('\u{3ff}', 0x37d), ('\u{400}', 0x450), - ('\u{401}', 0x451), ('\u{402}', 0x452), ('\u{403}', 0x453), ('\u{404}', 0x454), - ('\u{405}', 0x455), ('\u{406}', 0x456), ('\u{407}', 0x457), ('\u{408}', 0x458), - ('\u{409}', 0x459), ('\u{40a}', 0x45a), ('\u{40b}', 0x45b), ('\u{40c}', 0x45c), - ('\u{40d}', 0x45d), ('\u{40e}', 0x45e), ('\u{40f}', 0x45f), ('\u{410}', 0x430), - ('\u{411}', 0x431), ('\u{412}', 0x432), ('\u{413}', 0x433), ('\u{414}', 0x434), - ('\u{415}', 0x435), ('\u{416}', 0x436), ('\u{417}', 0x437), ('\u{418}', 0x438), - ('\u{419}', 0x439), ('\u{41a}', 0x43a), ('\u{41b}', 0x43b), ('\u{41c}', 0x43c), - ('\u{41d}', 0x43d), ('\u{41e}', 0x43e), ('\u{41f}', 0x43f), ('\u{420}', 0x440), - ('\u{421}', 0x441), ('\u{422}', 0x442), ('\u{423}', 0x443), ('\u{424}', 0x444), - ('\u{425}', 0x445), ('\u{426}', 0x446), ('\u{427}', 0x447), ('\u{428}', 0x448), - ('\u{429}', 0x449), ('\u{42a}', 0x44a), ('\u{42b}', 0x44b), ('\u{42c}', 0x44c), - ('\u{42d}', 0x44d), ('\u{42e}', 0x44e), ('\u{42f}', 0x44f), ('\u{460}', 0x461), - ('\u{462}', 0x463), ('\u{464}', 0x465), ('\u{466}', 0x467), ('\u{468}', 0x469), - ('\u{46a}', 0x46b), ('\u{46c}', 0x46d), ('\u{46e}', 0x46f), ('\u{470}', 0x471), - ('\u{472}', 0x473), ('\u{474}', 0x475), ('\u{476}', 0x477), ('\u{478}', 0x479), - ('\u{47a}', 0x47b), ('\u{47c}', 0x47d), ('\u{47e}', 0x47f), ('\u{480}', 0x481), - ('\u{48a}', 0x48b), ('\u{48c}', 0x48d), ('\u{48e}', 0x48f), ('\u{490}', 0x491), - ('\u{492}', 0x493), ('\u{494}', 0x495), ('\u{496}', 0x497), ('\u{498}', 0x499), - ('\u{49a}', 0x49b), ('\u{49c}', 0x49d), ('\u{49e}', 0x49f), ('\u{4a0}', 0x4a1), - ('\u{4a2}', 0x4a3), ('\u{4a4}', 0x4a5), ('\u{4a6}', 0x4a7), ('\u{4a8}', 0x4a9), - ('\u{4aa}', 0x4ab), ('\u{4ac}', 0x4ad), ('\u{4ae}', 0x4af), ('\u{4b0}', 0x4b1), - ('\u{4b2}', 0x4b3), ('\u{4b4}', 0x4b5), ('\u{4b6}', 0x4b7), ('\u{4b8}', 0x4b9), - ('\u{4ba}', 0x4bb), ('\u{4bc}', 0x4bd), ('\u{4be}', 0x4bf), ('\u{4c0}', 0x4cf), - ('\u{4c1}', 0x4c2), ('\u{4c3}', 0x4c4), ('\u{4c5}', 0x4c6), ('\u{4c7}', 0x4c8), - ('\u{4c9}', 0x4ca), ('\u{4cb}', 0x4cc), ('\u{4cd}', 0x4ce), ('\u{4d0}', 0x4d1), - ('\u{4d2}', 0x4d3), ('\u{4d4}', 0x4d5), ('\u{4d6}', 0x4d7), ('\u{4d8}', 0x4d9), - ('\u{4da}', 0x4db), ('\u{4dc}', 0x4dd), ('\u{4de}', 0x4df), ('\u{4e0}', 0x4e1), - ('\u{4e2}', 0x4e3), ('\u{4e4}', 0x4e5), ('\u{4e6}', 0x4e7), ('\u{4e8}', 0x4e9), - ('\u{4ea}', 0x4eb), ('\u{4ec}', 0x4ed), ('\u{4ee}', 0x4ef), ('\u{4f0}', 0x4f1), - ('\u{4f2}', 0x4f3), ('\u{4f4}', 0x4f5), ('\u{4f6}', 0x4f7), ('\u{4f8}', 0x4f9), - ('\u{4fa}', 0x4fb), ('\u{4fc}', 0x4fd), ('\u{4fe}', 0x4ff), ('\u{500}', 0x501), - ('\u{502}', 0x503), ('\u{504}', 0x505), ('\u{506}', 0x507), ('\u{508}', 0x509), - ('\u{50a}', 0x50b), ('\u{50c}', 0x50d), ('\u{50e}', 0x50f), ('\u{510}', 0x511), - ('\u{512}', 0x513), ('\u{514}', 0x515), ('\u{516}', 0x517), ('\u{518}', 0x519), - ('\u{51a}', 0x51b), ('\u{51c}', 0x51d), ('\u{51e}', 0x51f), ('\u{520}', 0x521), - ('\u{522}', 0x523), ('\u{524}', 0x525), ('\u{526}', 0x527), ('\u{528}', 0x529), - ('\u{52a}', 0x52b), ('\u{52c}', 0x52d), ('\u{52e}', 0x52f), ('\u{531}', 0x561), - ('\u{532}', 0x562), ('\u{533}', 0x563), ('\u{534}', 0x564), ('\u{535}', 0x565), - ('\u{536}', 0x566), ('\u{537}', 0x567), ('\u{538}', 0x568), ('\u{539}', 0x569), - ('\u{53a}', 0x56a), ('\u{53b}', 0x56b), ('\u{53c}', 0x56c), ('\u{53d}', 0x56d), - ('\u{53e}', 0x56e), ('\u{53f}', 0x56f), ('\u{540}', 0x570), ('\u{541}', 0x571), - ('\u{542}', 0x572), ('\u{543}', 0x573), ('\u{544}', 0x574), ('\u{545}', 0x575), - ('\u{546}', 0x576), ('\u{547}', 0x577), ('\u{548}', 0x578), ('\u{549}', 0x579), - ('\u{54a}', 0x57a), ('\u{54b}', 0x57b), ('\u{54c}', 0x57c), ('\u{54d}', 0x57d), - ('\u{54e}', 0x57e), ('\u{54f}', 0x57f), ('\u{550}', 0x580), ('\u{551}', 0x581), - ('\u{552}', 0x582), ('\u{553}', 0x583), ('\u{554}', 0x584), ('\u{555}', 0x585), - ('\u{556}', 0x586), ('\u{10a0}', 0x2d00), ('\u{10a1}', 0x2d01), ('\u{10a2}', 0x2d02), - ('\u{10a3}', 0x2d03), ('\u{10a4}', 0x2d04), ('\u{10a5}', 0x2d05), ('\u{10a6}', 0x2d06), - ('\u{10a7}', 0x2d07), ('\u{10a8}', 0x2d08), ('\u{10a9}', 0x2d09), ('\u{10aa}', 0x2d0a), - ('\u{10ab}', 0x2d0b), ('\u{10ac}', 0x2d0c), ('\u{10ad}', 0x2d0d), ('\u{10ae}', 0x2d0e), - ('\u{10af}', 0x2d0f), ('\u{10b0}', 0x2d10), ('\u{10b1}', 0x2d11), ('\u{10b2}', 0x2d12), - ('\u{10b3}', 0x2d13), ('\u{10b4}', 0x2d14), ('\u{10b5}', 0x2d15), ('\u{10b6}', 0x2d16), - ('\u{10b7}', 0x2d17), ('\u{10b8}', 0x2d18), ('\u{10b9}', 0x2d19), ('\u{10ba}', 0x2d1a), - ('\u{10bb}', 0x2d1b), ('\u{10bc}', 0x2d1c), ('\u{10bd}', 0x2d1d), ('\u{10be}', 0x2d1e), - ('\u{10bf}', 0x2d1f), ('\u{10c0}', 0x2d20), ('\u{10c1}', 0x2d21), ('\u{10c2}', 0x2d22), - ('\u{10c3}', 0x2d23), ('\u{10c4}', 0x2d24), ('\u{10c5}', 0x2d25), ('\u{10c7}', 0x2d27), - ('\u{10cd}', 0x2d2d), ('\u{13a0}', 0xab70), ('\u{13a1}', 0xab71), ('\u{13a2}', 0xab72), - ('\u{13a3}', 0xab73), ('\u{13a4}', 0xab74), ('\u{13a5}', 0xab75), ('\u{13a6}', 0xab76), - ('\u{13a7}', 0xab77), ('\u{13a8}', 0xab78), ('\u{13a9}', 0xab79), ('\u{13aa}', 0xab7a), - ('\u{13ab}', 0xab7b), ('\u{13ac}', 0xab7c), ('\u{13ad}', 0xab7d), ('\u{13ae}', 0xab7e), - ('\u{13af}', 0xab7f), ('\u{13b0}', 0xab80), ('\u{13b1}', 0xab81), ('\u{13b2}', 0xab82), - ('\u{13b3}', 0xab83), ('\u{13b4}', 0xab84), ('\u{13b5}', 0xab85), ('\u{13b6}', 0xab86), - ('\u{13b7}', 0xab87), ('\u{13b8}', 0xab88), ('\u{13b9}', 0xab89), ('\u{13ba}', 0xab8a), - ('\u{13bb}', 0xab8b), ('\u{13bc}', 0xab8c), ('\u{13bd}', 0xab8d), ('\u{13be}', 0xab8e), - ('\u{13bf}', 0xab8f), ('\u{13c0}', 0xab90), ('\u{13c1}', 0xab91), ('\u{13c2}', 0xab92), - ('\u{13c3}', 0xab93), ('\u{13c4}', 0xab94), ('\u{13c5}', 0xab95), ('\u{13c6}', 0xab96), - ('\u{13c7}', 0xab97), ('\u{13c8}', 0xab98), ('\u{13c9}', 0xab99), ('\u{13ca}', 0xab9a), - ('\u{13cb}', 0xab9b), ('\u{13cc}', 0xab9c), ('\u{13cd}', 0xab9d), ('\u{13ce}', 0xab9e), - ('\u{13cf}', 0xab9f), ('\u{13d0}', 0xaba0), ('\u{13d1}', 0xaba1), ('\u{13d2}', 0xaba2), - ('\u{13d3}', 0xaba3), ('\u{13d4}', 0xaba4), ('\u{13d5}', 0xaba5), ('\u{13d6}', 0xaba6), - ('\u{13d7}', 0xaba7), ('\u{13d8}', 0xaba8), ('\u{13d9}', 0xaba9), ('\u{13da}', 0xabaa), - ('\u{13db}', 0xabab), ('\u{13dc}', 0xabac), ('\u{13dd}', 0xabad), ('\u{13de}', 0xabae), - ('\u{13df}', 0xabaf), ('\u{13e0}', 0xabb0), ('\u{13e1}', 0xabb1), ('\u{13e2}', 0xabb2), - ('\u{13e3}', 0xabb3), ('\u{13e4}', 0xabb4), ('\u{13e5}', 0xabb5), ('\u{13e6}', 0xabb6), - ('\u{13e7}', 0xabb7), ('\u{13e8}', 0xabb8), ('\u{13e9}', 0xabb9), ('\u{13ea}', 0xabba), - ('\u{13eb}', 0xabbb), ('\u{13ec}', 0xabbc), ('\u{13ed}', 0xabbd), ('\u{13ee}', 0xabbe), - ('\u{13ef}', 0xabbf), ('\u{13f0}', 0x13f8), ('\u{13f1}', 0x13f9), ('\u{13f2}', 0x13fa), - ('\u{13f3}', 0x13fb), ('\u{13f4}', 0x13fc), ('\u{13f5}', 0x13fd), ('\u{1c89}', 0x1c8a), - ('\u{1c90}', 0x10d0), ('\u{1c91}', 0x10d1), ('\u{1c92}', 0x10d2), ('\u{1c93}', 0x10d3), - ('\u{1c94}', 0x10d4), ('\u{1c95}', 0x10d5), ('\u{1c96}', 0x10d6), ('\u{1c97}', 0x10d7), - ('\u{1c98}', 0x10d8), ('\u{1c99}', 0x10d9), ('\u{1c9a}', 0x10da), ('\u{1c9b}', 0x10db), - ('\u{1c9c}', 0x10dc), ('\u{1c9d}', 0x10dd), ('\u{1c9e}', 0x10de), ('\u{1c9f}', 0x10df), - ('\u{1ca0}', 0x10e0), ('\u{1ca1}', 0x10e1), ('\u{1ca2}', 0x10e2), ('\u{1ca3}', 0x10e3), - ('\u{1ca4}', 0x10e4), ('\u{1ca5}', 0x10e5), ('\u{1ca6}', 0x10e6), ('\u{1ca7}', 0x10e7), - ('\u{1ca8}', 0x10e8), ('\u{1ca9}', 0x10e9), ('\u{1caa}', 0x10ea), ('\u{1cab}', 0x10eb), - ('\u{1cac}', 0x10ec), ('\u{1cad}', 0x10ed), ('\u{1cae}', 0x10ee), ('\u{1caf}', 0x10ef), - ('\u{1cb0}', 0x10f0), ('\u{1cb1}', 0x10f1), ('\u{1cb2}', 0x10f2), ('\u{1cb3}', 0x10f3), - ('\u{1cb4}', 0x10f4), ('\u{1cb5}', 0x10f5), ('\u{1cb6}', 0x10f6), ('\u{1cb7}', 0x10f7), - ('\u{1cb8}', 0x10f8), ('\u{1cb9}', 0x10f9), ('\u{1cba}', 0x10fa), ('\u{1cbd}', 0x10fd), - ('\u{1cbe}', 0x10fe), ('\u{1cbf}', 0x10ff), ('\u{1e00}', 0x1e01), ('\u{1e02}', 0x1e03), - ('\u{1e04}', 0x1e05), ('\u{1e06}', 0x1e07), ('\u{1e08}', 0x1e09), ('\u{1e0a}', 0x1e0b), - ('\u{1e0c}', 0x1e0d), ('\u{1e0e}', 0x1e0f), ('\u{1e10}', 0x1e11), ('\u{1e12}', 0x1e13), - ('\u{1e14}', 0x1e15), ('\u{1e16}', 0x1e17), ('\u{1e18}', 0x1e19), ('\u{1e1a}', 0x1e1b), - ('\u{1e1c}', 0x1e1d), ('\u{1e1e}', 0x1e1f), ('\u{1e20}', 0x1e21), ('\u{1e22}', 0x1e23), - ('\u{1e24}', 0x1e25), ('\u{1e26}', 0x1e27), ('\u{1e28}', 0x1e29), ('\u{1e2a}', 0x1e2b), - ('\u{1e2c}', 0x1e2d), ('\u{1e2e}', 0x1e2f), ('\u{1e30}', 0x1e31), ('\u{1e32}', 0x1e33), - ('\u{1e34}', 0x1e35), ('\u{1e36}', 0x1e37), ('\u{1e38}', 0x1e39), ('\u{1e3a}', 0x1e3b), - ('\u{1e3c}', 0x1e3d), ('\u{1e3e}', 0x1e3f), ('\u{1e40}', 0x1e41), ('\u{1e42}', 0x1e43), - ('\u{1e44}', 0x1e45), ('\u{1e46}', 0x1e47), ('\u{1e48}', 0x1e49), ('\u{1e4a}', 0x1e4b), - ('\u{1e4c}', 0x1e4d), ('\u{1e4e}', 0x1e4f), ('\u{1e50}', 0x1e51), ('\u{1e52}', 0x1e53), - ('\u{1e54}', 0x1e55), ('\u{1e56}', 0x1e57), ('\u{1e58}', 0x1e59), ('\u{1e5a}', 0x1e5b), - ('\u{1e5c}', 0x1e5d), ('\u{1e5e}', 0x1e5f), ('\u{1e60}', 0x1e61), ('\u{1e62}', 0x1e63), - ('\u{1e64}', 0x1e65), ('\u{1e66}', 0x1e67), ('\u{1e68}', 0x1e69), ('\u{1e6a}', 0x1e6b), - ('\u{1e6c}', 0x1e6d), ('\u{1e6e}', 0x1e6f), ('\u{1e70}', 0x1e71), ('\u{1e72}', 0x1e73), - ('\u{1e74}', 0x1e75), ('\u{1e76}', 0x1e77), ('\u{1e78}', 0x1e79), ('\u{1e7a}', 0x1e7b), - ('\u{1e7c}', 0x1e7d), ('\u{1e7e}', 0x1e7f), ('\u{1e80}', 0x1e81), ('\u{1e82}', 0x1e83), - ('\u{1e84}', 0x1e85), ('\u{1e86}', 0x1e87), ('\u{1e88}', 0x1e89), ('\u{1e8a}', 0x1e8b), - ('\u{1e8c}', 0x1e8d), ('\u{1e8e}', 0x1e8f), ('\u{1e90}', 0x1e91), ('\u{1e92}', 0x1e93), - ('\u{1e94}', 0x1e95), ('\u{1e9e}', 0xdf), ('\u{1ea0}', 0x1ea1), ('\u{1ea2}', 0x1ea3), - ('\u{1ea4}', 0x1ea5), ('\u{1ea6}', 0x1ea7), ('\u{1ea8}', 0x1ea9), ('\u{1eaa}', 0x1eab), - ('\u{1eac}', 0x1ead), ('\u{1eae}', 0x1eaf), ('\u{1eb0}', 0x1eb1), ('\u{1eb2}', 0x1eb3), - ('\u{1eb4}', 0x1eb5), ('\u{1eb6}', 0x1eb7), ('\u{1eb8}', 0x1eb9), ('\u{1eba}', 0x1ebb), - ('\u{1ebc}', 0x1ebd), ('\u{1ebe}', 0x1ebf), ('\u{1ec0}', 0x1ec1), ('\u{1ec2}', 0x1ec3), - ('\u{1ec4}', 0x1ec5), ('\u{1ec6}', 0x1ec7), ('\u{1ec8}', 0x1ec9), ('\u{1eca}', 0x1ecb), - ('\u{1ecc}', 0x1ecd), ('\u{1ece}', 0x1ecf), ('\u{1ed0}', 0x1ed1), ('\u{1ed2}', 0x1ed3), - ('\u{1ed4}', 0x1ed5), ('\u{1ed6}', 0x1ed7), ('\u{1ed8}', 0x1ed9), ('\u{1eda}', 0x1edb), - ('\u{1edc}', 0x1edd), ('\u{1ede}', 0x1edf), ('\u{1ee0}', 0x1ee1), ('\u{1ee2}', 0x1ee3), - ('\u{1ee4}', 0x1ee5), ('\u{1ee6}', 0x1ee7), ('\u{1ee8}', 0x1ee9), ('\u{1eea}', 0x1eeb), - ('\u{1eec}', 0x1eed), ('\u{1eee}', 0x1eef), ('\u{1ef0}', 0x1ef1), ('\u{1ef2}', 0x1ef3), - ('\u{1ef4}', 0x1ef5), ('\u{1ef6}', 0x1ef7), ('\u{1ef8}', 0x1ef9), ('\u{1efa}', 0x1efb), - ('\u{1efc}', 0x1efd), ('\u{1efe}', 0x1eff), ('\u{1f08}', 0x1f00), ('\u{1f09}', 0x1f01), - ('\u{1f0a}', 0x1f02), ('\u{1f0b}', 0x1f03), ('\u{1f0c}', 0x1f04), ('\u{1f0d}', 0x1f05), - ('\u{1f0e}', 0x1f06), ('\u{1f0f}', 0x1f07), ('\u{1f18}', 0x1f10), ('\u{1f19}', 0x1f11), - ('\u{1f1a}', 0x1f12), ('\u{1f1b}', 0x1f13), ('\u{1f1c}', 0x1f14), ('\u{1f1d}', 0x1f15), - ('\u{1f28}', 0x1f20), ('\u{1f29}', 0x1f21), ('\u{1f2a}', 0x1f22), ('\u{1f2b}', 0x1f23), - ('\u{1f2c}', 0x1f24), ('\u{1f2d}', 0x1f25), ('\u{1f2e}', 0x1f26), ('\u{1f2f}', 0x1f27), - ('\u{1f38}', 0x1f30), ('\u{1f39}', 0x1f31), ('\u{1f3a}', 0x1f32), ('\u{1f3b}', 0x1f33), - ('\u{1f3c}', 0x1f34), ('\u{1f3d}', 0x1f35), ('\u{1f3e}', 0x1f36), ('\u{1f3f}', 0x1f37), - ('\u{1f48}', 0x1f40), ('\u{1f49}', 0x1f41), ('\u{1f4a}', 0x1f42), ('\u{1f4b}', 0x1f43), - ('\u{1f4c}', 0x1f44), ('\u{1f4d}', 0x1f45), ('\u{1f59}', 0x1f51), ('\u{1f5b}', 0x1f53), - ('\u{1f5d}', 0x1f55), ('\u{1f5f}', 0x1f57), ('\u{1f68}', 0x1f60), ('\u{1f69}', 0x1f61), - ('\u{1f6a}', 0x1f62), ('\u{1f6b}', 0x1f63), ('\u{1f6c}', 0x1f64), ('\u{1f6d}', 0x1f65), - ('\u{1f6e}', 0x1f66), ('\u{1f6f}', 0x1f67), ('\u{1f88}', 0x1f80), ('\u{1f89}', 0x1f81), - ('\u{1f8a}', 0x1f82), ('\u{1f8b}', 0x1f83), ('\u{1f8c}', 0x1f84), ('\u{1f8d}', 0x1f85), - ('\u{1f8e}', 0x1f86), ('\u{1f8f}', 0x1f87), ('\u{1f98}', 0x1f90), ('\u{1f99}', 0x1f91), - ('\u{1f9a}', 0x1f92), ('\u{1f9b}', 0x1f93), ('\u{1f9c}', 0x1f94), ('\u{1f9d}', 0x1f95), - ('\u{1f9e}', 0x1f96), ('\u{1f9f}', 0x1f97), ('\u{1fa8}', 0x1fa0), ('\u{1fa9}', 0x1fa1), - ('\u{1faa}', 0x1fa2), ('\u{1fab}', 0x1fa3), ('\u{1fac}', 0x1fa4), ('\u{1fad}', 0x1fa5), - ('\u{1fae}', 0x1fa6), ('\u{1faf}', 0x1fa7), ('\u{1fb8}', 0x1fb0), ('\u{1fb9}', 0x1fb1), - ('\u{1fba}', 0x1f70), ('\u{1fbb}', 0x1f71), ('\u{1fbc}', 0x1fb3), ('\u{1fc8}', 0x1f72), - ('\u{1fc9}', 0x1f73), ('\u{1fca}', 0x1f74), ('\u{1fcb}', 0x1f75), ('\u{1fcc}', 0x1fc3), - ('\u{1fd8}', 0x1fd0), ('\u{1fd9}', 0x1fd1), ('\u{1fda}', 0x1f76), ('\u{1fdb}', 0x1f77), - ('\u{1fe8}', 0x1fe0), ('\u{1fe9}', 0x1fe1), ('\u{1fea}', 0x1f7a), ('\u{1feb}', 0x1f7b), - ('\u{1fec}', 0x1fe5), ('\u{1ff8}', 0x1f78), ('\u{1ff9}', 0x1f79), ('\u{1ffa}', 0x1f7c), - ('\u{1ffb}', 0x1f7d), ('\u{1ffc}', 0x1ff3), ('\u{2126}', 0x3c9), ('\u{212a}', 0x6b), - ('\u{212b}', 0xe5), ('\u{2132}', 0x214e), ('\u{2160}', 0x2170), ('\u{2161}', 0x2171), - ('\u{2162}', 0x2172), ('\u{2163}', 0x2173), ('\u{2164}', 0x2174), ('\u{2165}', 0x2175), - ('\u{2166}', 0x2176), ('\u{2167}', 0x2177), ('\u{2168}', 0x2178), ('\u{2169}', 0x2179), - ('\u{216a}', 0x217a), ('\u{216b}', 0x217b), ('\u{216c}', 0x217c), ('\u{216d}', 0x217d), - ('\u{216e}', 0x217e), ('\u{216f}', 0x217f), ('\u{2183}', 0x2184), ('\u{24b6}', 0x24d0), - ('\u{24b7}', 0x24d1), ('\u{24b8}', 0x24d2), ('\u{24b9}', 0x24d3), ('\u{24ba}', 0x24d4), - ('\u{24bb}', 0x24d5), ('\u{24bc}', 0x24d6), ('\u{24bd}', 0x24d7), ('\u{24be}', 0x24d8), - ('\u{24bf}', 0x24d9), ('\u{24c0}', 0x24da), ('\u{24c1}', 0x24db), ('\u{24c2}', 0x24dc), - ('\u{24c3}', 0x24dd), ('\u{24c4}', 0x24de), ('\u{24c5}', 0x24df), ('\u{24c6}', 0x24e0), - ('\u{24c7}', 0x24e1), ('\u{24c8}', 0x24e2), ('\u{24c9}', 0x24e3), ('\u{24ca}', 0x24e4), - ('\u{24cb}', 0x24e5), ('\u{24cc}', 0x24e6), ('\u{24cd}', 0x24e7), ('\u{24ce}', 0x24e8), - ('\u{24cf}', 0x24e9), ('\u{2c00}', 0x2c30), ('\u{2c01}', 0x2c31), ('\u{2c02}', 0x2c32), - ('\u{2c03}', 0x2c33), ('\u{2c04}', 0x2c34), ('\u{2c05}', 0x2c35), ('\u{2c06}', 0x2c36), - ('\u{2c07}', 0x2c37), ('\u{2c08}', 0x2c38), ('\u{2c09}', 0x2c39), ('\u{2c0a}', 0x2c3a), - ('\u{2c0b}', 0x2c3b), ('\u{2c0c}', 0x2c3c), ('\u{2c0d}', 0x2c3d), ('\u{2c0e}', 0x2c3e), - ('\u{2c0f}', 0x2c3f), ('\u{2c10}', 0x2c40), ('\u{2c11}', 0x2c41), ('\u{2c12}', 0x2c42), - ('\u{2c13}', 0x2c43), ('\u{2c14}', 0x2c44), ('\u{2c15}', 0x2c45), ('\u{2c16}', 0x2c46), - ('\u{2c17}', 0x2c47), ('\u{2c18}', 0x2c48), ('\u{2c19}', 0x2c49), ('\u{2c1a}', 0x2c4a), - ('\u{2c1b}', 0x2c4b), ('\u{2c1c}', 0x2c4c), ('\u{2c1d}', 0x2c4d), ('\u{2c1e}', 0x2c4e), - ('\u{2c1f}', 0x2c4f), ('\u{2c20}', 0x2c50), ('\u{2c21}', 0x2c51), ('\u{2c22}', 0x2c52), - ('\u{2c23}', 0x2c53), ('\u{2c24}', 0x2c54), ('\u{2c25}', 0x2c55), ('\u{2c26}', 0x2c56), - ('\u{2c27}', 0x2c57), ('\u{2c28}', 0x2c58), ('\u{2c29}', 0x2c59), ('\u{2c2a}', 0x2c5a), - ('\u{2c2b}', 0x2c5b), ('\u{2c2c}', 0x2c5c), ('\u{2c2d}', 0x2c5d), ('\u{2c2e}', 0x2c5e), - ('\u{2c2f}', 0x2c5f), ('\u{2c60}', 0x2c61), ('\u{2c62}', 0x26b), ('\u{2c63}', 0x1d7d), - ('\u{2c64}', 0x27d), ('\u{2c67}', 0x2c68), ('\u{2c69}', 0x2c6a), ('\u{2c6b}', 0x2c6c), - ('\u{2c6d}', 0x251), ('\u{2c6e}', 0x271), ('\u{2c6f}', 0x250), ('\u{2c70}', 0x252), - ('\u{2c72}', 0x2c73), ('\u{2c75}', 0x2c76), ('\u{2c7e}', 0x23f), ('\u{2c7f}', 0x240), - ('\u{2c80}', 0x2c81), ('\u{2c82}', 0x2c83), ('\u{2c84}', 0x2c85), ('\u{2c86}', 0x2c87), - ('\u{2c88}', 0x2c89), ('\u{2c8a}', 0x2c8b), ('\u{2c8c}', 0x2c8d), ('\u{2c8e}', 0x2c8f), - ('\u{2c90}', 0x2c91), ('\u{2c92}', 0x2c93), ('\u{2c94}', 0x2c95), ('\u{2c96}', 0x2c97), - ('\u{2c98}', 0x2c99), ('\u{2c9a}', 0x2c9b), ('\u{2c9c}', 0x2c9d), ('\u{2c9e}', 0x2c9f), - ('\u{2ca0}', 0x2ca1), ('\u{2ca2}', 0x2ca3), ('\u{2ca4}', 0x2ca5), ('\u{2ca6}', 0x2ca7), - ('\u{2ca8}', 0x2ca9), ('\u{2caa}', 0x2cab), ('\u{2cac}', 0x2cad), ('\u{2cae}', 0x2caf), - ('\u{2cb0}', 0x2cb1), ('\u{2cb2}', 0x2cb3), ('\u{2cb4}', 0x2cb5), ('\u{2cb6}', 0x2cb7), - ('\u{2cb8}', 0x2cb9), ('\u{2cba}', 0x2cbb), ('\u{2cbc}', 0x2cbd), ('\u{2cbe}', 0x2cbf), - ('\u{2cc0}', 0x2cc1), ('\u{2cc2}', 0x2cc3), ('\u{2cc4}', 0x2cc5), ('\u{2cc6}', 0x2cc7), - ('\u{2cc8}', 0x2cc9), ('\u{2cca}', 0x2ccb), ('\u{2ccc}', 0x2ccd), ('\u{2cce}', 0x2ccf), - ('\u{2cd0}', 0x2cd1), ('\u{2cd2}', 0x2cd3), ('\u{2cd4}', 0x2cd5), ('\u{2cd6}', 0x2cd7), - ('\u{2cd8}', 0x2cd9), ('\u{2cda}', 0x2cdb), ('\u{2cdc}', 0x2cdd), ('\u{2cde}', 0x2cdf), - ('\u{2ce0}', 0x2ce1), ('\u{2ce2}', 0x2ce3), ('\u{2ceb}', 0x2cec), ('\u{2ced}', 0x2cee), - ('\u{2cf2}', 0x2cf3), ('\u{a640}', 0xa641), ('\u{a642}', 0xa643), ('\u{a644}', 0xa645), - ('\u{a646}', 0xa647), ('\u{a648}', 0xa649), ('\u{a64a}', 0xa64b), ('\u{a64c}', 0xa64d), - ('\u{a64e}', 0xa64f), ('\u{a650}', 0xa651), ('\u{a652}', 0xa653), ('\u{a654}', 0xa655), - ('\u{a656}', 0xa657), ('\u{a658}', 0xa659), ('\u{a65a}', 0xa65b), ('\u{a65c}', 0xa65d), - ('\u{a65e}', 0xa65f), ('\u{a660}', 0xa661), ('\u{a662}', 0xa663), ('\u{a664}', 0xa665), - ('\u{a666}', 0xa667), ('\u{a668}', 0xa669), ('\u{a66a}', 0xa66b), ('\u{a66c}', 0xa66d), - ('\u{a680}', 0xa681), ('\u{a682}', 0xa683), ('\u{a684}', 0xa685), ('\u{a686}', 0xa687), - ('\u{a688}', 0xa689), ('\u{a68a}', 0xa68b), ('\u{a68c}', 0xa68d), ('\u{a68e}', 0xa68f), - ('\u{a690}', 0xa691), ('\u{a692}', 0xa693), ('\u{a694}', 0xa695), ('\u{a696}', 0xa697), - ('\u{a698}', 0xa699), ('\u{a69a}', 0xa69b), ('\u{a722}', 0xa723), ('\u{a724}', 0xa725), - ('\u{a726}', 0xa727), ('\u{a728}', 0xa729), ('\u{a72a}', 0xa72b), ('\u{a72c}', 0xa72d), - ('\u{a72e}', 0xa72f), ('\u{a732}', 0xa733), ('\u{a734}', 0xa735), ('\u{a736}', 0xa737), - ('\u{a738}', 0xa739), ('\u{a73a}', 0xa73b), ('\u{a73c}', 0xa73d), ('\u{a73e}', 0xa73f), - ('\u{a740}', 0xa741), ('\u{a742}', 0xa743), ('\u{a744}', 0xa745), ('\u{a746}', 0xa747), - ('\u{a748}', 0xa749), ('\u{a74a}', 0xa74b), ('\u{a74c}', 0xa74d), ('\u{a74e}', 0xa74f), - ('\u{a750}', 0xa751), ('\u{a752}', 0xa753), ('\u{a754}', 0xa755), ('\u{a756}', 0xa757), - ('\u{a758}', 0xa759), ('\u{a75a}', 0xa75b), ('\u{a75c}', 0xa75d), ('\u{a75e}', 0xa75f), - ('\u{a760}', 0xa761), ('\u{a762}', 0xa763), ('\u{a764}', 0xa765), ('\u{a766}', 0xa767), - ('\u{a768}', 0xa769), ('\u{a76a}', 0xa76b), ('\u{a76c}', 0xa76d), ('\u{a76e}', 0xa76f), - ('\u{a779}', 0xa77a), ('\u{a77b}', 0xa77c), ('\u{a77d}', 0x1d79), ('\u{a77e}', 0xa77f), - ('\u{a780}', 0xa781), ('\u{a782}', 0xa783), ('\u{a784}', 0xa785), ('\u{a786}', 0xa787), - ('\u{a78b}', 0xa78c), ('\u{a78d}', 0x265), ('\u{a790}', 0xa791), ('\u{a792}', 0xa793), - ('\u{a796}', 0xa797), ('\u{a798}', 0xa799), ('\u{a79a}', 0xa79b), ('\u{a79c}', 0xa79d), - ('\u{a79e}', 0xa79f), ('\u{a7a0}', 0xa7a1), ('\u{a7a2}', 0xa7a3), ('\u{a7a4}', 0xa7a5), - ('\u{a7a6}', 0xa7a7), ('\u{a7a8}', 0xa7a9), ('\u{a7aa}', 0x266), ('\u{a7ab}', 0x25c), - ('\u{a7ac}', 0x261), ('\u{a7ad}', 0x26c), ('\u{a7ae}', 0x26a), ('\u{a7b0}', 0x29e), - ('\u{a7b1}', 0x287), ('\u{a7b2}', 0x29d), ('\u{a7b3}', 0xab53), ('\u{a7b4}', 0xa7b5), - ('\u{a7b6}', 0xa7b7), ('\u{a7b8}', 0xa7b9), ('\u{a7ba}', 0xa7bb), ('\u{a7bc}', 0xa7bd), - ('\u{a7be}', 0xa7bf), ('\u{a7c0}', 0xa7c1), ('\u{a7c2}', 0xa7c3), ('\u{a7c4}', 0xa794), - ('\u{a7c5}', 0x282), ('\u{a7c6}', 0x1d8e), ('\u{a7c7}', 0xa7c8), ('\u{a7c9}', 0xa7ca), - ('\u{a7cb}', 0x264), ('\u{a7cc}', 0xa7cd), ('\u{a7ce}', 0xa7cf), ('\u{a7d0}', 0xa7d1), - ('\u{a7d2}', 0xa7d3), ('\u{a7d4}', 0xa7d5), ('\u{a7d6}', 0xa7d7), ('\u{a7d8}', 0xa7d9), - ('\u{a7da}', 0xa7db), ('\u{a7dc}', 0x19b), ('\u{a7f5}', 0xa7f6), ('\u{ff21}', 0xff41), - ('\u{ff22}', 0xff42), ('\u{ff23}', 0xff43), ('\u{ff24}', 0xff44), ('\u{ff25}', 0xff45), - ('\u{ff26}', 0xff46), ('\u{ff27}', 0xff47), ('\u{ff28}', 0xff48), ('\u{ff29}', 0xff49), - ('\u{ff2a}', 0xff4a), ('\u{ff2b}', 0xff4b), ('\u{ff2c}', 0xff4c), ('\u{ff2d}', 0xff4d), - ('\u{ff2e}', 0xff4e), ('\u{ff2f}', 0xff4f), ('\u{ff30}', 0xff50), ('\u{ff31}', 0xff51), - ('\u{ff32}', 0xff52), ('\u{ff33}', 0xff53), ('\u{ff34}', 0xff54), ('\u{ff35}', 0xff55), - ('\u{ff36}', 0xff56), ('\u{ff37}', 0xff57), ('\u{ff38}', 0xff58), ('\u{ff39}', 0xff59), - ('\u{ff3a}', 0xff5a), ('\u{10400}', 0x10428), ('\u{10401}', 0x10429), - ('\u{10402}', 0x1042a), ('\u{10403}', 0x1042b), ('\u{10404}', 0x1042c), - ('\u{10405}', 0x1042d), ('\u{10406}', 0x1042e), ('\u{10407}', 0x1042f), - ('\u{10408}', 0x10430), ('\u{10409}', 0x10431), ('\u{1040a}', 0x10432), - ('\u{1040b}', 0x10433), ('\u{1040c}', 0x10434), ('\u{1040d}', 0x10435), - ('\u{1040e}', 0x10436), ('\u{1040f}', 0x10437), ('\u{10410}', 0x10438), - ('\u{10411}', 0x10439), ('\u{10412}', 0x1043a), ('\u{10413}', 0x1043b), - ('\u{10414}', 0x1043c), ('\u{10415}', 0x1043d), ('\u{10416}', 0x1043e), - ('\u{10417}', 0x1043f), ('\u{10418}', 0x10440), ('\u{10419}', 0x10441), - ('\u{1041a}', 0x10442), ('\u{1041b}', 0x10443), ('\u{1041c}', 0x10444), - ('\u{1041d}', 0x10445), ('\u{1041e}', 0x10446), ('\u{1041f}', 0x10447), - ('\u{10420}', 0x10448), ('\u{10421}', 0x10449), ('\u{10422}', 0x1044a), - ('\u{10423}', 0x1044b), ('\u{10424}', 0x1044c), ('\u{10425}', 0x1044d), - ('\u{10426}', 0x1044e), ('\u{10427}', 0x1044f), ('\u{104b0}', 0x104d8), - ('\u{104b1}', 0x104d9), ('\u{104b2}', 0x104da), ('\u{104b3}', 0x104db), - ('\u{104b4}', 0x104dc), ('\u{104b5}', 0x104dd), ('\u{104b6}', 0x104de), - ('\u{104b7}', 0x104df), ('\u{104b8}', 0x104e0), ('\u{104b9}', 0x104e1), - ('\u{104ba}', 0x104e2), ('\u{104bb}', 0x104e3), ('\u{104bc}', 0x104e4), - ('\u{104bd}', 0x104e5), ('\u{104be}', 0x104e6), ('\u{104bf}', 0x104e7), - ('\u{104c0}', 0x104e8), ('\u{104c1}', 0x104e9), ('\u{104c2}', 0x104ea), - ('\u{104c3}', 0x104eb), ('\u{104c4}', 0x104ec), ('\u{104c5}', 0x104ed), - ('\u{104c6}', 0x104ee), ('\u{104c7}', 0x104ef), ('\u{104c8}', 0x104f0), - ('\u{104c9}', 0x104f1), ('\u{104ca}', 0x104f2), ('\u{104cb}', 0x104f3), - ('\u{104cc}', 0x104f4), ('\u{104cd}', 0x104f5), ('\u{104ce}', 0x104f6), - ('\u{104cf}', 0x104f7), ('\u{104d0}', 0x104f8), ('\u{104d1}', 0x104f9), - ('\u{104d2}', 0x104fa), ('\u{104d3}', 0x104fb), ('\u{10570}', 0x10597), - ('\u{10571}', 0x10598), ('\u{10572}', 0x10599), ('\u{10573}', 0x1059a), - ('\u{10574}', 0x1059b), ('\u{10575}', 0x1059c), ('\u{10576}', 0x1059d), - ('\u{10577}', 0x1059e), ('\u{10578}', 0x1059f), ('\u{10579}', 0x105a0), - ('\u{1057a}', 0x105a1), ('\u{1057c}', 0x105a3), ('\u{1057d}', 0x105a4), - ('\u{1057e}', 0x105a5), ('\u{1057f}', 0x105a6), ('\u{10580}', 0x105a7), - ('\u{10581}', 0x105a8), ('\u{10582}', 0x105a9), ('\u{10583}', 0x105aa), - ('\u{10584}', 0x105ab), ('\u{10585}', 0x105ac), ('\u{10586}', 0x105ad), - ('\u{10587}', 0x105ae), ('\u{10588}', 0x105af), ('\u{10589}', 0x105b0), - ('\u{1058a}', 0x105b1), ('\u{1058c}', 0x105b3), ('\u{1058d}', 0x105b4), - ('\u{1058e}', 0x105b5), ('\u{1058f}', 0x105b6), ('\u{10590}', 0x105b7), - ('\u{10591}', 0x105b8), ('\u{10592}', 0x105b9), ('\u{10594}', 0x105bb), - ('\u{10595}', 0x105bc), ('\u{10c80}', 0x10cc0), ('\u{10c81}', 0x10cc1), - ('\u{10c82}', 0x10cc2), ('\u{10c83}', 0x10cc3), ('\u{10c84}', 0x10cc4), - ('\u{10c85}', 0x10cc5), ('\u{10c86}', 0x10cc6), ('\u{10c87}', 0x10cc7), - ('\u{10c88}', 0x10cc8), ('\u{10c89}', 0x10cc9), ('\u{10c8a}', 0x10cca), - ('\u{10c8b}', 0x10ccb), ('\u{10c8c}', 0x10ccc), ('\u{10c8d}', 0x10ccd), - ('\u{10c8e}', 0x10cce), ('\u{10c8f}', 0x10ccf), ('\u{10c90}', 0x10cd0), - ('\u{10c91}', 0x10cd1), ('\u{10c92}', 0x10cd2), ('\u{10c93}', 0x10cd3), - ('\u{10c94}', 0x10cd4), ('\u{10c95}', 0x10cd5), ('\u{10c96}', 0x10cd6), - ('\u{10c97}', 0x10cd7), ('\u{10c98}', 0x10cd8), ('\u{10c99}', 0x10cd9), - ('\u{10c9a}', 0x10cda), ('\u{10c9b}', 0x10cdb), ('\u{10c9c}', 0x10cdc), - ('\u{10c9d}', 0x10cdd), ('\u{10c9e}', 0x10cde), ('\u{10c9f}', 0x10cdf), - ('\u{10ca0}', 0x10ce0), ('\u{10ca1}', 0x10ce1), ('\u{10ca2}', 0x10ce2), - ('\u{10ca3}', 0x10ce3), ('\u{10ca4}', 0x10ce4), ('\u{10ca5}', 0x10ce5), - ('\u{10ca6}', 0x10ce6), ('\u{10ca7}', 0x10ce7), ('\u{10ca8}', 0x10ce8), - ('\u{10ca9}', 0x10ce9), ('\u{10caa}', 0x10cea), ('\u{10cab}', 0x10ceb), - ('\u{10cac}', 0x10cec), ('\u{10cad}', 0x10ced), ('\u{10cae}', 0x10cee), - ('\u{10caf}', 0x10cef), ('\u{10cb0}', 0x10cf0), ('\u{10cb1}', 0x10cf1), - ('\u{10cb2}', 0x10cf2), ('\u{10d50}', 0x10d70), ('\u{10d51}', 0x10d71), - ('\u{10d52}', 0x10d72), ('\u{10d53}', 0x10d73), ('\u{10d54}', 0x10d74), - ('\u{10d55}', 0x10d75), ('\u{10d56}', 0x10d76), ('\u{10d57}', 0x10d77), - ('\u{10d58}', 0x10d78), ('\u{10d59}', 0x10d79), ('\u{10d5a}', 0x10d7a), - ('\u{10d5b}', 0x10d7b), ('\u{10d5c}', 0x10d7c), ('\u{10d5d}', 0x10d7d), - ('\u{10d5e}', 0x10d7e), ('\u{10d5f}', 0x10d7f), ('\u{10d60}', 0x10d80), - ('\u{10d61}', 0x10d81), ('\u{10d62}', 0x10d82), ('\u{10d63}', 0x10d83), - ('\u{10d64}', 0x10d84), ('\u{10d65}', 0x10d85), ('\u{118a0}', 0x118c0), - ('\u{118a1}', 0x118c1), ('\u{118a2}', 0x118c2), ('\u{118a3}', 0x118c3), - ('\u{118a4}', 0x118c4), ('\u{118a5}', 0x118c5), ('\u{118a6}', 0x118c6), - ('\u{118a7}', 0x118c7), ('\u{118a8}', 0x118c8), ('\u{118a9}', 0x118c9), - ('\u{118aa}', 0x118ca), ('\u{118ab}', 0x118cb), ('\u{118ac}', 0x118cc), - ('\u{118ad}', 0x118cd), ('\u{118ae}', 0x118ce), ('\u{118af}', 0x118cf), - ('\u{118b0}', 0x118d0), ('\u{118b1}', 0x118d1), ('\u{118b2}', 0x118d2), - ('\u{118b3}', 0x118d3), ('\u{118b4}', 0x118d4), ('\u{118b5}', 0x118d5), - ('\u{118b6}', 0x118d6), ('\u{118b7}', 0x118d7), ('\u{118b8}', 0x118d8), - ('\u{118b9}', 0x118d9), ('\u{118ba}', 0x118da), ('\u{118bb}', 0x118db), - ('\u{118bc}', 0x118dc), ('\u{118bd}', 0x118dd), ('\u{118be}', 0x118de), - ('\u{118bf}', 0x118df), ('\u{16e40}', 0x16e60), ('\u{16e41}', 0x16e61), - ('\u{16e42}', 0x16e62), ('\u{16e43}', 0x16e63), ('\u{16e44}', 0x16e64), - ('\u{16e45}', 0x16e65), ('\u{16e46}', 0x16e66), ('\u{16e47}', 0x16e67), - ('\u{16e48}', 0x16e68), ('\u{16e49}', 0x16e69), ('\u{16e4a}', 0x16e6a), - ('\u{16e4b}', 0x16e6b), ('\u{16e4c}', 0x16e6c), ('\u{16e4d}', 0x16e6d), - ('\u{16e4e}', 0x16e6e), ('\u{16e4f}', 0x16e6f), ('\u{16e50}', 0x16e70), - ('\u{16e51}', 0x16e71), ('\u{16e52}', 0x16e72), ('\u{16e53}', 0x16e73), - ('\u{16e54}', 0x16e74), ('\u{16e55}', 0x16e75), ('\u{16e56}', 0x16e76), - ('\u{16e57}', 0x16e77), ('\u{16e58}', 0x16e78), ('\u{16e59}', 0x16e79), - ('\u{16e5a}', 0x16e7a), ('\u{16e5b}', 0x16e7b), ('\u{16e5c}', 0x16e7c), - ('\u{16e5d}', 0x16e7d), ('\u{16e5e}', 0x16e7e), ('\u{16e5f}', 0x16e7f), - ('\u{16ea0}', 0x16ebb), ('\u{16ea1}', 0x16ebc), ('\u{16ea2}', 0x16ebd), - ('\u{16ea3}', 0x16ebe), ('\u{16ea4}', 0x16ebf), ('\u{16ea5}', 0x16ec0), - ('\u{16ea6}', 0x16ec1), ('\u{16ea7}', 0x16ec2), ('\u{16ea8}', 0x16ec3), - ('\u{16ea9}', 0x16ec4), ('\u{16eaa}', 0x16ec5), ('\u{16eab}', 0x16ec6), - ('\u{16eac}', 0x16ec7), ('\u{16ead}', 0x16ec8), ('\u{16eae}', 0x16ec9), - ('\u{16eaf}', 0x16eca), ('\u{16eb0}', 0x16ecb), ('\u{16eb1}', 0x16ecc), - ('\u{16eb2}', 0x16ecd), ('\u{16eb3}', 0x16ece), ('\u{16eb4}', 0x16ecf), - ('\u{16eb5}', 0x16ed0), ('\u{16eb6}', 0x16ed1), ('\u{16eb7}', 0x16ed2), - ('\u{16eb8}', 0x16ed3), ('\u{1e900}', 0x1e922), ('\u{1e901}', 0x1e923), - ('\u{1e902}', 0x1e924), ('\u{1e903}', 0x1e925), ('\u{1e904}', 0x1e926), - ('\u{1e905}', 0x1e927), ('\u{1e906}', 0x1e928), ('\u{1e907}', 0x1e929), - ('\u{1e908}', 0x1e92a), ('\u{1e909}', 0x1e92b), ('\u{1e90a}', 0x1e92c), - ('\u{1e90b}', 0x1e92d), ('\u{1e90c}', 0x1e92e), ('\u{1e90d}', 0x1e92f), - ('\u{1e90e}', 0x1e930), ('\u{1e90f}', 0x1e931), ('\u{1e910}', 0x1e932), - ('\u{1e911}', 0x1e933), ('\u{1e912}', 0x1e934), ('\u{1e913}', 0x1e935), - ('\u{1e914}', 0x1e936), ('\u{1e915}', 0x1e937), ('\u{1e916}', 0x1e938), - ('\u{1e917}', 0x1e939), ('\u{1e918}', 0x1e93a), ('\u{1e919}', 0x1e93b), - ('\u{1e91a}', 0x1e93c), ('\u{1e91b}', 0x1e93d), ('\u{1e91c}', 0x1e93e), - ('\u{1e91d}', 0x1e93f), ('\u{1e91e}', 0x1e940), ('\u{1e91f}', 0x1e941), - ('\u{1e920}', 0x1e942), ('\u{1e921}', 0x1e943), - ]; - - #[rustfmt::skip] - static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[ - ['\u{69}', '\u{307}', '\u{0}'], - ]; + const INDEX_MASK: u32 = 0x400000; - #[inline] pub fn to_lower(c: char) -> [char; 3] { - const { - let mut i = 0; - while i < LOWERCASE_TABLE.len() { - let (_, val) = LOWERCASE_TABLE[i]; - if val & (1 << 22) == 0 { - assert!(char::from_u32(val).is_some()); - } else { - let index = val & ((1 << 22) - 1); - assert!((index as usize) < LOWERCASE_TABLE_MULTI.len()); - } - i += 1; - } + if c.is_ascii() { + [(c as u8).to_ascii_lowercase() as char, '\0', '\0'] + } else { + LOWERCASE_TABLE + .binary_search_by(|&(key, _)| key.cmp(&c)) + .map(|i| { + let u = LOWERCASE_TABLE[i].1; + char::from_u32(u).map(|c| [c, '\0', '\0']).unwrap_or_else(|| { + // SAFETY: Index comes from statically generated table + unsafe { *LOWERCASE_TABLE_MULTI.get_unchecked((u & (INDEX_MASK - 1)) as usize) } + }) + }) + .unwrap_or([c, '\0', '\0']) } + } - // SAFETY: Just checked that the tables are valid - unsafe { - super::case_conversion( - c, - |c| c.to_ascii_lowercase(), - LOWERCASE_TABLE, - LOWERCASE_TABLE_MULTI, - ) + pub fn to_upper(c: char) -> [char; 3] { + if c.is_ascii() { + [(c as u8).to_ascii_uppercase() as char, '\0', '\0'] + } else { + UPPERCASE_TABLE + .binary_search_by(|&(key, _)| key.cmp(&c)) + .map(|i| { + let u = UPPERCASE_TABLE[i].1; + char::from_u32(u).map(|c| [c, '\0', '\0']).unwrap_or_else(|| { + // SAFETY: Index comes from statically generated table + unsafe { *UPPERCASE_TABLE_MULTI.get_unchecked((u & (INDEX_MASK - 1)) as usize) } + }) + }) + .unwrap_or([c, '\0', '\0']) } } - #[rustfmt::skip] + static LOWERCASE_TABLE: &[(char, u32); 1462] = &[ + ('\u{c0}', 224), ('\u{c1}', 225), ('\u{c2}', 226), ('\u{c3}', 227), ('\u{c4}', 228), + ('\u{c5}', 229), ('\u{c6}', 230), ('\u{c7}', 231), ('\u{c8}', 232), ('\u{c9}', 233), + ('\u{ca}', 234), ('\u{cb}', 235), ('\u{cc}', 236), ('\u{cd}', 237), ('\u{ce}', 238), + ('\u{cf}', 239), ('\u{d0}', 240), ('\u{d1}', 241), ('\u{d2}', 242), ('\u{d3}', 243), + ('\u{d4}', 244), ('\u{d5}', 245), ('\u{d6}', 246), ('\u{d8}', 248), ('\u{d9}', 249), + ('\u{da}', 250), ('\u{db}', 251), ('\u{dc}', 252), ('\u{dd}', 253), ('\u{de}', 254), + ('\u{100}', 257), ('\u{102}', 259), ('\u{104}', 261), ('\u{106}', 263), ('\u{108}', 265), + ('\u{10a}', 267), ('\u{10c}', 269), ('\u{10e}', 271), ('\u{110}', 273), ('\u{112}', 275), + ('\u{114}', 277), ('\u{116}', 279), ('\u{118}', 281), ('\u{11a}', 283), ('\u{11c}', 285), + ('\u{11e}', 287), ('\u{120}', 289), ('\u{122}', 291), ('\u{124}', 293), ('\u{126}', 295), + ('\u{128}', 297), ('\u{12a}', 299), ('\u{12c}', 301), ('\u{12e}', 303), + ('\u{130}', 4194304), ('\u{132}', 307), ('\u{134}', 309), ('\u{136}', 311), + ('\u{139}', 314), ('\u{13b}', 316), ('\u{13d}', 318), ('\u{13f}', 320), ('\u{141}', 322), + ('\u{143}', 324), ('\u{145}', 326), ('\u{147}', 328), ('\u{14a}', 331), ('\u{14c}', 333), + ('\u{14e}', 335), ('\u{150}', 337), ('\u{152}', 339), ('\u{154}', 341), ('\u{156}', 343), + ('\u{158}', 345), ('\u{15a}', 347), ('\u{15c}', 349), ('\u{15e}', 351), ('\u{160}', 353), + ('\u{162}', 355), ('\u{164}', 357), ('\u{166}', 359), ('\u{168}', 361), ('\u{16a}', 363), + ('\u{16c}', 365), ('\u{16e}', 367), ('\u{170}', 369), ('\u{172}', 371), ('\u{174}', 373), + ('\u{176}', 375), ('\u{178}', 255), ('\u{179}', 378), ('\u{17b}', 380), ('\u{17d}', 382), + ('\u{181}', 595), ('\u{182}', 387), ('\u{184}', 389), ('\u{186}', 596), ('\u{187}', 392), + ('\u{189}', 598), ('\u{18a}', 599), ('\u{18b}', 396), ('\u{18e}', 477), ('\u{18f}', 601), + ('\u{190}', 603), ('\u{191}', 402), ('\u{193}', 608), ('\u{194}', 611), ('\u{196}', 617), + ('\u{197}', 616), ('\u{198}', 409), ('\u{19c}', 623), ('\u{19d}', 626), ('\u{19f}', 629), + ('\u{1a0}', 417), ('\u{1a2}', 419), ('\u{1a4}', 421), ('\u{1a6}', 640), ('\u{1a7}', 424), + ('\u{1a9}', 643), ('\u{1ac}', 429), ('\u{1ae}', 648), ('\u{1af}', 432), ('\u{1b1}', 650), + ('\u{1b2}', 651), ('\u{1b3}', 436), ('\u{1b5}', 438), ('\u{1b7}', 658), ('\u{1b8}', 441), + ('\u{1bc}', 445), ('\u{1c4}', 454), ('\u{1c5}', 454), ('\u{1c7}', 457), ('\u{1c8}', 457), + ('\u{1ca}', 460), ('\u{1cb}', 460), ('\u{1cd}', 462), ('\u{1cf}', 464), ('\u{1d1}', 466), + ('\u{1d3}', 468), ('\u{1d5}', 470), ('\u{1d7}', 472), ('\u{1d9}', 474), ('\u{1db}', 476), + ('\u{1de}', 479), ('\u{1e0}', 481), ('\u{1e2}', 483), ('\u{1e4}', 485), ('\u{1e6}', 487), + ('\u{1e8}', 489), ('\u{1ea}', 491), ('\u{1ec}', 493), ('\u{1ee}', 495), ('\u{1f1}', 499), + ('\u{1f2}', 499), ('\u{1f4}', 501), ('\u{1f6}', 405), ('\u{1f7}', 447), ('\u{1f8}', 505), + ('\u{1fa}', 507), ('\u{1fc}', 509), ('\u{1fe}', 511), ('\u{200}', 513), ('\u{202}', 515), + ('\u{204}', 517), ('\u{206}', 519), ('\u{208}', 521), ('\u{20a}', 523), ('\u{20c}', 525), + ('\u{20e}', 527), ('\u{210}', 529), ('\u{212}', 531), ('\u{214}', 533), ('\u{216}', 535), + ('\u{218}', 537), ('\u{21a}', 539), ('\u{21c}', 541), ('\u{21e}', 543), ('\u{220}', 414), + ('\u{222}', 547), ('\u{224}', 549), ('\u{226}', 551), ('\u{228}', 553), ('\u{22a}', 555), + ('\u{22c}', 557), ('\u{22e}', 559), ('\u{230}', 561), ('\u{232}', 563), ('\u{23a}', 11365), + ('\u{23b}', 572), ('\u{23d}', 410), ('\u{23e}', 11366), ('\u{241}', 578), ('\u{243}', 384), + ('\u{244}', 649), ('\u{245}', 652), ('\u{246}', 583), ('\u{248}', 585), ('\u{24a}', 587), + ('\u{24c}', 589), ('\u{24e}', 591), ('\u{370}', 881), ('\u{372}', 883), ('\u{376}', 887), + ('\u{37f}', 1011), ('\u{386}', 940), ('\u{388}', 941), ('\u{389}', 942), ('\u{38a}', 943), + ('\u{38c}', 972), ('\u{38e}', 973), ('\u{38f}', 974), ('\u{391}', 945), ('\u{392}', 946), + ('\u{393}', 947), ('\u{394}', 948), ('\u{395}', 949), ('\u{396}', 950), ('\u{397}', 951), + ('\u{398}', 952), ('\u{399}', 953), ('\u{39a}', 954), ('\u{39b}', 955), ('\u{39c}', 956), + ('\u{39d}', 957), ('\u{39e}', 958), ('\u{39f}', 959), ('\u{3a0}', 960), ('\u{3a1}', 961), + ('\u{3a3}', 963), ('\u{3a4}', 964), ('\u{3a5}', 965), ('\u{3a6}', 966), ('\u{3a7}', 967), + ('\u{3a8}', 968), ('\u{3a9}', 969), ('\u{3aa}', 970), ('\u{3ab}', 971), ('\u{3cf}', 983), + ('\u{3d8}', 985), ('\u{3da}', 987), ('\u{3dc}', 989), ('\u{3de}', 991), ('\u{3e0}', 993), + ('\u{3e2}', 995), ('\u{3e4}', 997), ('\u{3e6}', 999), ('\u{3e8}', 1001), ('\u{3ea}', 1003), + ('\u{3ec}', 1005), ('\u{3ee}', 1007), ('\u{3f4}', 952), ('\u{3f7}', 1016), + ('\u{3f9}', 1010), ('\u{3fa}', 1019), ('\u{3fd}', 891), ('\u{3fe}', 892), ('\u{3ff}', 893), + ('\u{400}', 1104), ('\u{401}', 1105), ('\u{402}', 1106), ('\u{403}', 1107), + ('\u{404}', 1108), ('\u{405}', 1109), ('\u{406}', 1110), ('\u{407}', 1111), + ('\u{408}', 1112), ('\u{409}', 1113), ('\u{40a}', 1114), ('\u{40b}', 1115), + ('\u{40c}', 1116), ('\u{40d}', 1117), ('\u{40e}', 1118), ('\u{40f}', 1119), + ('\u{410}', 1072), ('\u{411}', 1073), ('\u{412}', 1074), ('\u{413}', 1075), + ('\u{414}', 1076), ('\u{415}', 1077), ('\u{416}', 1078), ('\u{417}', 1079), + ('\u{418}', 1080), ('\u{419}', 1081), ('\u{41a}', 1082), ('\u{41b}', 1083), + ('\u{41c}', 1084), ('\u{41d}', 1085), ('\u{41e}', 1086), ('\u{41f}', 1087), + ('\u{420}', 1088), ('\u{421}', 1089), ('\u{422}', 1090), ('\u{423}', 1091), + ('\u{424}', 1092), ('\u{425}', 1093), ('\u{426}', 1094), ('\u{427}', 1095), + ('\u{428}', 1096), ('\u{429}', 1097), ('\u{42a}', 1098), ('\u{42b}', 1099), + ('\u{42c}', 1100), ('\u{42d}', 1101), ('\u{42e}', 1102), ('\u{42f}', 1103), + ('\u{460}', 1121), ('\u{462}', 1123), ('\u{464}', 1125), ('\u{466}', 1127), + ('\u{468}', 1129), ('\u{46a}', 1131), ('\u{46c}', 1133), ('\u{46e}', 1135), + ('\u{470}', 1137), ('\u{472}', 1139), ('\u{474}', 1141), ('\u{476}', 1143), + ('\u{478}', 1145), ('\u{47a}', 1147), ('\u{47c}', 1149), ('\u{47e}', 1151), + ('\u{480}', 1153), ('\u{48a}', 1163), ('\u{48c}', 1165), ('\u{48e}', 1167), + ('\u{490}', 1169), ('\u{492}', 1171), ('\u{494}', 1173), ('\u{496}', 1175), + ('\u{498}', 1177), ('\u{49a}', 1179), ('\u{49c}', 1181), ('\u{49e}', 1183), + ('\u{4a0}', 1185), ('\u{4a2}', 1187), ('\u{4a4}', 1189), ('\u{4a6}', 1191), + ('\u{4a8}', 1193), ('\u{4aa}', 1195), ('\u{4ac}', 1197), ('\u{4ae}', 1199), + ('\u{4b0}', 1201), ('\u{4b2}', 1203), ('\u{4b4}', 1205), ('\u{4b6}', 1207), + ('\u{4b8}', 1209), ('\u{4ba}', 1211), ('\u{4bc}', 1213), ('\u{4be}', 1215), + ('\u{4c0}', 1231), ('\u{4c1}', 1218), ('\u{4c3}', 1220), ('\u{4c5}', 1222), + ('\u{4c7}', 1224), ('\u{4c9}', 1226), ('\u{4cb}', 1228), ('\u{4cd}', 1230), + ('\u{4d0}', 1233), ('\u{4d2}', 1235), ('\u{4d4}', 1237), ('\u{4d6}', 1239), + ('\u{4d8}', 1241), ('\u{4da}', 1243), ('\u{4dc}', 1245), ('\u{4de}', 1247), + ('\u{4e0}', 1249), ('\u{4e2}', 1251), ('\u{4e4}', 1253), ('\u{4e6}', 1255), + ('\u{4e8}', 1257), ('\u{4ea}', 1259), ('\u{4ec}', 1261), ('\u{4ee}', 1263), + ('\u{4f0}', 1265), ('\u{4f2}', 1267), ('\u{4f4}', 1269), ('\u{4f6}', 1271), + ('\u{4f8}', 1273), ('\u{4fa}', 1275), ('\u{4fc}', 1277), ('\u{4fe}', 1279), + ('\u{500}', 1281), ('\u{502}', 1283), ('\u{504}', 1285), ('\u{506}', 1287), + ('\u{508}', 1289), ('\u{50a}', 1291), ('\u{50c}', 1293), ('\u{50e}', 1295), + ('\u{510}', 1297), ('\u{512}', 1299), ('\u{514}', 1301), ('\u{516}', 1303), + ('\u{518}', 1305), ('\u{51a}', 1307), ('\u{51c}', 1309), ('\u{51e}', 1311), + ('\u{520}', 1313), ('\u{522}', 1315), ('\u{524}', 1317), ('\u{526}', 1319), + ('\u{528}', 1321), ('\u{52a}', 1323), ('\u{52c}', 1325), ('\u{52e}', 1327), + ('\u{531}', 1377), ('\u{532}', 1378), ('\u{533}', 1379), ('\u{534}', 1380), + ('\u{535}', 1381), ('\u{536}', 1382), ('\u{537}', 1383), ('\u{538}', 1384), + ('\u{539}', 1385), ('\u{53a}', 1386), ('\u{53b}', 1387), ('\u{53c}', 1388), + ('\u{53d}', 1389), ('\u{53e}', 1390), ('\u{53f}', 1391), ('\u{540}', 1392), + ('\u{541}', 1393), ('\u{542}', 1394), ('\u{543}', 1395), ('\u{544}', 1396), + ('\u{545}', 1397), ('\u{546}', 1398), ('\u{547}', 1399), ('\u{548}', 1400), + ('\u{549}', 1401), ('\u{54a}', 1402), ('\u{54b}', 1403), ('\u{54c}', 1404), + ('\u{54d}', 1405), ('\u{54e}', 1406), ('\u{54f}', 1407), ('\u{550}', 1408), + ('\u{551}', 1409), ('\u{552}', 1410), ('\u{553}', 1411), ('\u{554}', 1412), + ('\u{555}', 1413), ('\u{556}', 1414), ('\u{10a0}', 11520), ('\u{10a1}', 11521), + ('\u{10a2}', 11522), ('\u{10a3}', 11523), ('\u{10a4}', 11524), ('\u{10a5}', 11525), + ('\u{10a6}', 11526), ('\u{10a7}', 11527), ('\u{10a8}', 11528), ('\u{10a9}', 11529), + ('\u{10aa}', 11530), ('\u{10ab}', 11531), ('\u{10ac}', 11532), ('\u{10ad}', 11533), + ('\u{10ae}', 11534), ('\u{10af}', 11535), ('\u{10b0}', 11536), ('\u{10b1}', 11537), + ('\u{10b2}', 11538), ('\u{10b3}', 11539), ('\u{10b4}', 11540), ('\u{10b5}', 11541), + ('\u{10b6}', 11542), ('\u{10b7}', 11543), ('\u{10b8}', 11544), ('\u{10b9}', 11545), + ('\u{10ba}', 11546), ('\u{10bb}', 11547), ('\u{10bc}', 11548), ('\u{10bd}', 11549), + ('\u{10be}', 11550), ('\u{10bf}', 11551), ('\u{10c0}', 11552), ('\u{10c1}', 11553), + ('\u{10c2}', 11554), ('\u{10c3}', 11555), ('\u{10c4}', 11556), ('\u{10c5}', 11557), + ('\u{10c7}', 11559), ('\u{10cd}', 11565), ('\u{13a0}', 43888), ('\u{13a1}', 43889), + ('\u{13a2}', 43890), ('\u{13a3}', 43891), ('\u{13a4}', 43892), ('\u{13a5}', 43893), + ('\u{13a6}', 43894), ('\u{13a7}', 43895), ('\u{13a8}', 43896), ('\u{13a9}', 43897), + ('\u{13aa}', 43898), ('\u{13ab}', 43899), ('\u{13ac}', 43900), ('\u{13ad}', 43901), + ('\u{13ae}', 43902), ('\u{13af}', 43903), ('\u{13b0}', 43904), ('\u{13b1}', 43905), + ('\u{13b2}', 43906), ('\u{13b3}', 43907), ('\u{13b4}', 43908), ('\u{13b5}', 43909), + ('\u{13b6}', 43910), ('\u{13b7}', 43911), ('\u{13b8}', 43912), ('\u{13b9}', 43913), + ('\u{13ba}', 43914), ('\u{13bb}', 43915), ('\u{13bc}', 43916), ('\u{13bd}', 43917), + ('\u{13be}', 43918), ('\u{13bf}', 43919), ('\u{13c0}', 43920), ('\u{13c1}', 43921), + ('\u{13c2}', 43922), ('\u{13c3}', 43923), ('\u{13c4}', 43924), ('\u{13c5}', 43925), + ('\u{13c6}', 43926), ('\u{13c7}', 43927), ('\u{13c8}', 43928), ('\u{13c9}', 43929), + ('\u{13ca}', 43930), ('\u{13cb}', 43931), ('\u{13cc}', 43932), ('\u{13cd}', 43933), + ('\u{13ce}', 43934), ('\u{13cf}', 43935), ('\u{13d0}', 43936), ('\u{13d1}', 43937), + ('\u{13d2}', 43938), ('\u{13d3}', 43939), ('\u{13d4}', 43940), ('\u{13d5}', 43941), + ('\u{13d6}', 43942), ('\u{13d7}', 43943), ('\u{13d8}', 43944), ('\u{13d9}', 43945), + ('\u{13da}', 43946), ('\u{13db}', 43947), ('\u{13dc}', 43948), ('\u{13dd}', 43949), + ('\u{13de}', 43950), ('\u{13df}', 43951), ('\u{13e0}', 43952), ('\u{13e1}', 43953), + ('\u{13e2}', 43954), ('\u{13e3}', 43955), ('\u{13e4}', 43956), ('\u{13e5}', 43957), + ('\u{13e6}', 43958), ('\u{13e7}', 43959), ('\u{13e8}', 43960), ('\u{13e9}', 43961), + ('\u{13ea}', 43962), ('\u{13eb}', 43963), ('\u{13ec}', 43964), ('\u{13ed}', 43965), + ('\u{13ee}', 43966), ('\u{13ef}', 43967), ('\u{13f0}', 5112), ('\u{13f1}', 5113), + ('\u{13f2}', 5114), ('\u{13f3}', 5115), ('\u{13f4}', 5116), ('\u{13f5}', 5117), + ('\u{1c89}', 7306), ('\u{1c90}', 4304), ('\u{1c91}', 4305), ('\u{1c92}', 4306), + ('\u{1c93}', 4307), ('\u{1c94}', 4308), ('\u{1c95}', 4309), ('\u{1c96}', 4310), + ('\u{1c97}', 4311), ('\u{1c98}', 4312), ('\u{1c99}', 4313), ('\u{1c9a}', 4314), + ('\u{1c9b}', 4315), ('\u{1c9c}', 4316), ('\u{1c9d}', 4317), ('\u{1c9e}', 4318), + ('\u{1c9f}', 4319), ('\u{1ca0}', 4320), ('\u{1ca1}', 4321), ('\u{1ca2}', 4322), + ('\u{1ca3}', 4323), ('\u{1ca4}', 4324), ('\u{1ca5}', 4325), ('\u{1ca6}', 4326), + ('\u{1ca7}', 4327), ('\u{1ca8}', 4328), ('\u{1ca9}', 4329), ('\u{1caa}', 4330), + ('\u{1cab}', 4331), ('\u{1cac}', 4332), ('\u{1cad}', 4333), ('\u{1cae}', 4334), + ('\u{1caf}', 4335), ('\u{1cb0}', 4336), ('\u{1cb1}', 4337), ('\u{1cb2}', 4338), + ('\u{1cb3}', 4339), ('\u{1cb4}', 4340), ('\u{1cb5}', 4341), ('\u{1cb6}', 4342), + ('\u{1cb7}', 4343), ('\u{1cb8}', 4344), ('\u{1cb9}', 4345), ('\u{1cba}', 4346), + ('\u{1cbd}', 4349), ('\u{1cbe}', 4350), ('\u{1cbf}', 4351), ('\u{1e00}', 7681), + ('\u{1e02}', 7683), ('\u{1e04}', 7685), ('\u{1e06}', 7687), ('\u{1e08}', 7689), + ('\u{1e0a}', 7691), ('\u{1e0c}', 7693), ('\u{1e0e}', 7695), ('\u{1e10}', 7697), + ('\u{1e12}', 7699), ('\u{1e14}', 7701), ('\u{1e16}', 7703), ('\u{1e18}', 7705), + ('\u{1e1a}', 7707), ('\u{1e1c}', 7709), ('\u{1e1e}', 7711), ('\u{1e20}', 7713), + ('\u{1e22}', 7715), ('\u{1e24}', 7717), ('\u{1e26}', 7719), ('\u{1e28}', 7721), + ('\u{1e2a}', 7723), ('\u{1e2c}', 7725), ('\u{1e2e}', 7727), ('\u{1e30}', 7729), + ('\u{1e32}', 7731), ('\u{1e34}', 7733), ('\u{1e36}', 7735), ('\u{1e38}', 7737), + ('\u{1e3a}', 7739), ('\u{1e3c}', 7741), ('\u{1e3e}', 7743), ('\u{1e40}', 7745), + ('\u{1e42}', 7747), ('\u{1e44}', 7749), ('\u{1e46}', 7751), ('\u{1e48}', 7753), + ('\u{1e4a}', 7755), ('\u{1e4c}', 7757), ('\u{1e4e}', 7759), ('\u{1e50}', 7761), + ('\u{1e52}', 7763), ('\u{1e54}', 7765), ('\u{1e56}', 7767), ('\u{1e58}', 7769), + ('\u{1e5a}', 7771), ('\u{1e5c}', 7773), ('\u{1e5e}', 7775), ('\u{1e60}', 7777), + ('\u{1e62}', 7779), ('\u{1e64}', 7781), ('\u{1e66}', 7783), ('\u{1e68}', 7785), + ('\u{1e6a}', 7787), ('\u{1e6c}', 7789), ('\u{1e6e}', 7791), ('\u{1e70}', 7793), + ('\u{1e72}', 7795), ('\u{1e74}', 7797), ('\u{1e76}', 7799), ('\u{1e78}', 7801), + ('\u{1e7a}', 7803), ('\u{1e7c}', 7805), ('\u{1e7e}', 7807), ('\u{1e80}', 7809), + ('\u{1e82}', 7811), ('\u{1e84}', 7813), ('\u{1e86}', 7815), ('\u{1e88}', 7817), + ('\u{1e8a}', 7819), ('\u{1e8c}', 7821), ('\u{1e8e}', 7823), ('\u{1e90}', 7825), + ('\u{1e92}', 7827), ('\u{1e94}', 7829), ('\u{1e9e}', 223), ('\u{1ea0}', 7841), + ('\u{1ea2}', 7843), ('\u{1ea4}', 7845), ('\u{1ea6}', 7847), ('\u{1ea8}', 7849), + ('\u{1eaa}', 7851), ('\u{1eac}', 7853), ('\u{1eae}', 7855), ('\u{1eb0}', 7857), + ('\u{1eb2}', 7859), ('\u{1eb4}', 7861), ('\u{1eb6}', 7863), ('\u{1eb8}', 7865), + ('\u{1eba}', 7867), ('\u{1ebc}', 7869), ('\u{1ebe}', 7871), ('\u{1ec0}', 7873), + ('\u{1ec2}', 7875), ('\u{1ec4}', 7877), ('\u{1ec6}', 7879), ('\u{1ec8}', 7881), + ('\u{1eca}', 7883), ('\u{1ecc}', 7885), ('\u{1ece}', 7887), ('\u{1ed0}', 7889), + ('\u{1ed2}', 7891), ('\u{1ed4}', 7893), ('\u{1ed6}', 7895), ('\u{1ed8}', 7897), + ('\u{1eda}', 7899), ('\u{1edc}', 7901), ('\u{1ede}', 7903), ('\u{1ee0}', 7905), + ('\u{1ee2}', 7907), ('\u{1ee4}', 7909), ('\u{1ee6}', 7911), ('\u{1ee8}', 7913), + ('\u{1eea}', 7915), ('\u{1eec}', 7917), ('\u{1eee}', 7919), ('\u{1ef0}', 7921), + ('\u{1ef2}', 7923), ('\u{1ef4}', 7925), ('\u{1ef6}', 7927), ('\u{1ef8}', 7929), + ('\u{1efa}', 7931), ('\u{1efc}', 7933), ('\u{1efe}', 7935), ('\u{1f08}', 7936), + ('\u{1f09}', 7937), ('\u{1f0a}', 7938), ('\u{1f0b}', 7939), ('\u{1f0c}', 7940), + ('\u{1f0d}', 7941), ('\u{1f0e}', 7942), ('\u{1f0f}', 7943), ('\u{1f18}', 7952), + ('\u{1f19}', 7953), ('\u{1f1a}', 7954), ('\u{1f1b}', 7955), ('\u{1f1c}', 7956), + ('\u{1f1d}', 7957), ('\u{1f28}', 7968), ('\u{1f29}', 7969), ('\u{1f2a}', 7970), + ('\u{1f2b}', 7971), ('\u{1f2c}', 7972), ('\u{1f2d}', 7973), ('\u{1f2e}', 7974), + ('\u{1f2f}', 7975), ('\u{1f38}', 7984), ('\u{1f39}', 7985), ('\u{1f3a}', 7986), + ('\u{1f3b}', 7987), ('\u{1f3c}', 7988), ('\u{1f3d}', 7989), ('\u{1f3e}', 7990), + ('\u{1f3f}', 7991), ('\u{1f48}', 8000), ('\u{1f49}', 8001), ('\u{1f4a}', 8002), + ('\u{1f4b}', 8003), ('\u{1f4c}', 8004), ('\u{1f4d}', 8005), ('\u{1f59}', 8017), + ('\u{1f5b}', 8019), ('\u{1f5d}', 8021), ('\u{1f5f}', 8023), ('\u{1f68}', 8032), + ('\u{1f69}', 8033), ('\u{1f6a}', 8034), ('\u{1f6b}', 8035), ('\u{1f6c}', 8036), + ('\u{1f6d}', 8037), ('\u{1f6e}', 8038), ('\u{1f6f}', 8039), ('\u{1f88}', 8064), + ('\u{1f89}', 8065), ('\u{1f8a}', 8066), ('\u{1f8b}', 8067), ('\u{1f8c}', 8068), + ('\u{1f8d}', 8069), ('\u{1f8e}', 8070), ('\u{1f8f}', 8071), ('\u{1f98}', 8080), + ('\u{1f99}', 8081), ('\u{1f9a}', 8082), ('\u{1f9b}', 8083), ('\u{1f9c}', 8084), + ('\u{1f9d}', 8085), ('\u{1f9e}', 8086), ('\u{1f9f}', 8087), ('\u{1fa8}', 8096), + ('\u{1fa9}', 8097), ('\u{1faa}', 8098), ('\u{1fab}', 8099), ('\u{1fac}', 8100), + ('\u{1fad}', 8101), ('\u{1fae}', 8102), ('\u{1faf}', 8103), ('\u{1fb8}', 8112), + ('\u{1fb9}', 8113), ('\u{1fba}', 8048), ('\u{1fbb}', 8049), ('\u{1fbc}', 8115), + ('\u{1fc8}', 8050), ('\u{1fc9}', 8051), ('\u{1fca}', 8052), ('\u{1fcb}', 8053), + ('\u{1fcc}', 8131), ('\u{1fd8}', 8144), ('\u{1fd9}', 8145), ('\u{1fda}', 8054), + ('\u{1fdb}', 8055), ('\u{1fe8}', 8160), ('\u{1fe9}', 8161), ('\u{1fea}', 8058), + ('\u{1feb}', 8059), ('\u{1fec}', 8165), ('\u{1ff8}', 8056), ('\u{1ff9}', 8057), + ('\u{1ffa}', 8060), ('\u{1ffb}', 8061), ('\u{1ffc}', 8179), ('\u{2126}', 969), + ('\u{212a}', 107), ('\u{212b}', 229), ('\u{2132}', 8526), ('\u{2160}', 8560), + ('\u{2161}', 8561), ('\u{2162}', 8562), ('\u{2163}', 8563), ('\u{2164}', 8564), + ('\u{2165}', 8565), ('\u{2166}', 8566), ('\u{2167}', 8567), ('\u{2168}', 8568), + ('\u{2169}', 8569), ('\u{216a}', 8570), ('\u{216b}', 8571), ('\u{216c}', 8572), + ('\u{216d}', 8573), ('\u{216e}', 8574), ('\u{216f}', 8575), ('\u{2183}', 8580), + ('\u{24b6}', 9424), ('\u{24b7}', 9425), ('\u{24b8}', 9426), ('\u{24b9}', 9427), + ('\u{24ba}', 9428), ('\u{24bb}', 9429), ('\u{24bc}', 9430), ('\u{24bd}', 9431), + ('\u{24be}', 9432), ('\u{24bf}', 9433), ('\u{24c0}', 9434), ('\u{24c1}', 9435), + ('\u{24c2}', 9436), ('\u{24c3}', 9437), ('\u{24c4}', 9438), ('\u{24c5}', 9439), + ('\u{24c6}', 9440), ('\u{24c7}', 9441), ('\u{24c8}', 9442), ('\u{24c9}', 9443), + ('\u{24ca}', 9444), ('\u{24cb}', 9445), ('\u{24cc}', 9446), ('\u{24cd}', 9447), + ('\u{24ce}', 9448), ('\u{24cf}', 9449), ('\u{2c00}', 11312), ('\u{2c01}', 11313), + ('\u{2c02}', 11314), ('\u{2c03}', 11315), ('\u{2c04}', 11316), ('\u{2c05}', 11317), + ('\u{2c06}', 11318), ('\u{2c07}', 11319), ('\u{2c08}', 11320), ('\u{2c09}', 11321), + ('\u{2c0a}', 11322), ('\u{2c0b}', 11323), ('\u{2c0c}', 11324), ('\u{2c0d}', 11325), + ('\u{2c0e}', 11326), ('\u{2c0f}', 11327), ('\u{2c10}', 11328), ('\u{2c11}', 11329), + ('\u{2c12}', 11330), ('\u{2c13}', 11331), ('\u{2c14}', 11332), ('\u{2c15}', 11333), + ('\u{2c16}', 11334), ('\u{2c17}', 11335), ('\u{2c18}', 11336), ('\u{2c19}', 11337), + ('\u{2c1a}', 11338), ('\u{2c1b}', 11339), ('\u{2c1c}', 11340), ('\u{2c1d}', 11341), + ('\u{2c1e}', 11342), ('\u{2c1f}', 11343), ('\u{2c20}', 11344), ('\u{2c21}', 11345), + ('\u{2c22}', 11346), ('\u{2c23}', 11347), ('\u{2c24}', 11348), ('\u{2c25}', 11349), + ('\u{2c26}', 11350), ('\u{2c27}', 11351), ('\u{2c28}', 11352), ('\u{2c29}', 11353), + ('\u{2c2a}', 11354), ('\u{2c2b}', 11355), ('\u{2c2c}', 11356), ('\u{2c2d}', 11357), + ('\u{2c2e}', 11358), ('\u{2c2f}', 11359), ('\u{2c60}', 11361), ('\u{2c62}', 619), + ('\u{2c63}', 7549), ('\u{2c64}', 637), ('\u{2c67}', 11368), ('\u{2c69}', 11370), + ('\u{2c6b}', 11372), ('\u{2c6d}', 593), ('\u{2c6e}', 625), ('\u{2c6f}', 592), + ('\u{2c70}', 594), ('\u{2c72}', 11379), ('\u{2c75}', 11382), ('\u{2c7e}', 575), + ('\u{2c7f}', 576), ('\u{2c80}', 11393), ('\u{2c82}', 11395), ('\u{2c84}', 11397), + ('\u{2c86}', 11399), ('\u{2c88}', 11401), ('\u{2c8a}', 11403), ('\u{2c8c}', 11405), + ('\u{2c8e}', 11407), ('\u{2c90}', 11409), ('\u{2c92}', 11411), ('\u{2c94}', 11413), + ('\u{2c96}', 11415), ('\u{2c98}', 11417), ('\u{2c9a}', 11419), ('\u{2c9c}', 11421), + ('\u{2c9e}', 11423), ('\u{2ca0}', 11425), ('\u{2ca2}', 11427), ('\u{2ca4}', 11429), + ('\u{2ca6}', 11431), ('\u{2ca8}', 11433), ('\u{2caa}', 11435), ('\u{2cac}', 11437), + ('\u{2cae}', 11439), ('\u{2cb0}', 11441), ('\u{2cb2}', 11443), ('\u{2cb4}', 11445), + ('\u{2cb6}', 11447), ('\u{2cb8}', 11449), ('\u{2cba}', 11451), ('\u{2cbc}', 11453), + ('\u{2cbe}', 11455), ('\u{2cc0}', 11457), ('\u{2cc2}', 11459), ('\u{2cc4}', 11461), + ('\u{2cc6}', 11463), ('\u{2cc8}', 11465), ('\u{2cca}', 11467), ('\u{2ccc}', 11469), + ('\u{2cce}', 11471), ('\u{2cd0}', 11473), ('\u{2cd2}', 11475), ('\u{2cd4}', 11477), + ('\u{2cd6}', 11479), ('\u{2cd8}', 11481), ('\u{2cda}', 11483), ('\u{2cdc}', 11485), + ('\u{2cde}', 11487), ('\u{2ce0}', 11489), ('\u{2ce2}', 11491), ('\u{2ceb}', 11500), + ('\u{2ced}', 11502), ('\u{2cf2}', 11507), ('\u{a640}', 42561), ('\u{a642}', 42563), + ('\u{a644}', 42565), ('\u{a646}', 42567), ('\u{a648}', 42569), ('\u{a64a}', 42571), + ('\u{a64c}', 42573), ('\u{a64e}', 42575), ('\u{a650}', 42577), ('\u{a652}', 42579), + ('\u{a654}', 42581), ('\u{a656}', 42583), ('\u{a658}', 42585), ('\u{a65a}', 42587), + ('\u{a65c}', 42589), ('\u{a65e}', 42591), ('\u{a660}', 42593), ('\u{a662}', 42595), + ('\u{a664}', 42597), ('\u{a666}', 42599), ('\u{a668}', 42601), ('\u{a66a}', 42603), + ('\u{a66c}', 42605), ('\u{a680}', 42625), ('\u{a682}', 42627), ('\u{a684}', 42629), + ('\u{a686}', 42631), ('\u{a688}', 42633), ('\u{a68a}', 42635), ('\u{a68c}', 42637), + ('\u{a68e}', 42639), ('\u{a690}', 42641), ('\u{a692}', 42643), ('\u{a694}', 42645), + ('\u{a696}', 42647), ('\u{a698}', 42649), ('\u{a69a}', 42651), ('\u{a722}', 42787), + ('\u{a724}', 42789), ('\u{a726}', 42791), ('\u{a728}', 42793), ('\u{a72a}', 42795), + ('\u{a72c}', 42797), ('\u{a72e}', 42799), ('\u{a732}', 42803), ('\u{a734}', 42805), + ('\u{a736}', 42807), ('\u{a738}', 42809), ('\u{a73a}', 42811), ('\u{a73c}', 42813), + ('\u{a73e}', 42815), ('\u{a740}', 42817), ('\u{a742}', 42819), ('\u{a744}', 42821), + ('\u{a746}', 42823), ('\u{a748}', 42825), ('\u{a74a}', 42827), ('\u{a74c}', 42829), + ('\u{a74e}', 42831), ('\u{a750}', 42833), ('\u{a752}', 42835), ('\u{a754}', 42837), + ('\u{a756}', 42839), ('\u{a758}', 42841), ('\u{a75a}', 42843), ('\u{a75c}', 42845), + ('\u{a75e}', 42847), ('\u{a760}', 42849), ('\u{a762}', 42851), ('\u{a764}', 42853), + ('\u{a766}', 42855), ('\u{a768}', 42857), ('\u{a76a}', 42859), ('\u{a76c}', 42861), + ('\u{a76e}', 42863), ('\u{a779}', 42874), ('\u{a77b}', 42876), ('\u{a77d}', 7545), + ('\u{a77e}', 42879), ('\u{a780}', 42881), ('\u{a782}', 42883), ('\u{a784}', 42885), + ('\u{a786}', 42887), ('\u{a78b}', 42892), ('\u{a78d}', 613), ('\u{a790}', 42897), + ('\u{a792}', 42899), ('\u{a796}', 42903), ('\u{a798}', 42905), ('\u{a79a}', 42907), + ('\u{a79c}', 42909), ('\u{a79e}', 42911), ('\u{a7a0}', 42913), ('\u{a7a2}', 42915), + ('\u{a7a4}', 42917), ('\u{a7a6}', 42919), ('\u{a7a8}', 42921), ('\u{a7aa}', 614), + ('\u{a7ab}', 604), ('\u{a7ac}', 609), ('\u{a7ad}', 620), ('\u{a7ae}', 618), + ('\u{a7b0}', 670), ('\u{a7b1}', 647), ('\u{a7b2}', 669), ('\u{a7b3}', 43859), + ('\u{a7b4}', 42933), ('\u{a7b6}', 42935), ('\u{a7b8}', 42937), ('\u{a7ba}', 42939), + ('\u{a7bc}', 42941), ('\u{a7be}', 42943), ('\u{a7c0}', 42945), ('\u{a7c2}', 42947), + ('\u{a7c4}', 42900), ('\u{a7c5}', 642), ('\u{a7c6}', 7566), ('\u{a7c7}', 42952), + ('\u{a7c9}', 42954), ('\u{a7cb}', 612), ('\u{a7cc}', 42957), ('\u{a7ce}', 42959), + ('\u{a7d0}', 42961), ('\u{a7d2}', 42963), ('\u{a7d4}', 42965), ('\u{a7d6}', 42967), + ('\u{a7d8}', 42969), ('\u{a7da}', 42971), ('\u{a7dc}', 411), ('\u{a7f5}', 42998), + ('\u{ff21}', 65345), ('\u{ff22}', 65346), ('\u{ff23}', 65347), ('\u{ff24}', 65348), + ('\u{ff25}', 65349), ('\u{ff26}', 65350), ('\u{ff27}', 65351), ('\u{ff28}', 65352), + ('\u{ff29}', 65353), ('\u{ff2a}', 65354), ('\u{ff2b}', 65355), ('\u{ff2c}', 65356), + ('\u{ff2d}', 65357), ('\u{ff2e}', 65358), ('\u{ff2f}', 65359), ('\u{ff30}', 65360), + ('\u{ff31}', 65361), ('\u{ff32}', 65362), ('\u{ff33}', 65363), ('\u{ff34}', 65364), + ('\u{ff35}', 65365), ('\u{ff36}', 65366), ('\u{ff37}', 65367), ('\u{ff38}', 65368), + ('\u{ff39}', 65369), ('\u{ff3a}', 65370), ('\u{10400}', 66600), ('\u{10401}', 66601), + ('\u{10402}', 66602), ('\u{10403}', 66603), ('\u{10404}', 66604), ('\u{10405}', 66605), + ('\u{10406}', 66606), ('\u{10407}', 66607), ('\u{10408}', 66608), ('\u{10409}', 66609), + ('\u{1040a}', 66610), ('\u{1040b}', 66611), ('\u{1040c}', 66612), ('\u{1040d}', 66613), + ('\u{1040e}', 66614), ('\u{1040f}', 66615), ('\u{10410}', 66616), ('\u{10411}', 66617), + ('\u{10412}', 66618), ('\u{10413}', 66619), ('\u{10414}', 66620), ('\u{10415}', 66621), + ('\u{10416}', 66622), ('\u{10417}', 66623), ('\u{10418}', 66624), ('\u{10419}', 66625), + ('\u{1041a}', 66626), ('\u{1041b}', 66627), ('\u{1041c}', 66628), ('\u{1041d}', 66629), + ('\u{1041e}', 66630), ('\u{1041f}', 66631), ('\u{10420}', 66632), ('\u{10421}', 66633), + ('\u{10422}', 66634), ('\u{10423}', 66635), ('\u{10424}', 66636), ('\u{10425}', 66637), + ('\u{10426}', 66638), ('\u{10427}', 66639), ('\u{104b0}', 66776), ('\u{104b1}', 66777), + ('\u{104b2}', 66778), ('\u{104b3}', 66779), ('\u{104b4}', 66780), ('\u{104b5}', 66781), + ('\u{104b6}', 66782), ('\u{104b7}', 66783), ('\u{104b8}', 66784), ('\u{104b9}', 66785), + ('\u{104ba}', 66786), ('\u{104bb}', 66787), ('\u{104bc}', 66788), ('\u{104bd}', 66789), + ('\u{104be}', 66790), ('\u{104bf}', 66791), ('\u{104c0}', 66792), ('\u{104c1}', 66793), + ('\u{104c2}', 66794), ('\u{104c3}', 66795), ('\u{104c4}', 66796), ('\u{104c5}', 66797), + ('\u{104c6}', 66798), ('\u{104c7}', 66799), ('\u{104c8}', 66800), ('\u{104c9}', 66801), + ('\u{104ca}', 66802), ('\u{104cb}', 66803), ('\u{104cc}', 66804), ('\u{104cd}', 66805), + ('\u{104ce}', 66806), ('\u{104cf}', 66807), ('\u{104d0}', 66808), ('\u{104d1}', 66809), + ('\u{104d2}', 66810), ('\u{104d3}', 66811), ('\u{10570}', 66967), ('\u{10571}', 66968), + ('\u{10572}', 66969), ('\u{10573}', 66970), ('\u{10574}', 66971), ('\u{10575}', 66972), + ('\u{10576}', 66973), ('\u{10577}', 66974), ('\u{10578}', 66975), ('\u{10579}', 66976), + ('\u{1057a}', 66977), ('\u{1057c}', 66979), ('\u{1057d}', 66980), ('\u{1057e}', 66981), + ('\u{1057f}', 66982), ('\u{10580}', 66983), ('\u{10581}', 66984), ('\u{10582}', 66985), + ('\u{10583}', 66986), ('\u{10584}', 66987), ('\u{10585}', 66988), ('\u{10586}', 66989), + ('\u{10587}', 66990), ('\u{10588}', 66991), ('\u{10589}', 66992), ('\u{1058a}', 66993), + ('\u{1058c}', 66995), ('\u{1058d}', 66996), ('\u{1058e}', 66997), ('\u{1058f}', 66998), + ('\u{10590}', 66999), ('\u{10591}', 67000), ('\u{10592}', 67001), ('\u{10594}', 67003), + ('\u{10595}', 67004), ('\u{10c80}', 68800), ('\u{10c81}', 68801), ('\u{10c82}', 68802), + ('\u{10c83}', 68803), ('\u{10c84}', 68804), ('\u{10c85}', 68805), ('\u{10c86}', 68806), + ('\u{10c87}', 68807), ('\u{10c88}', 68808), ('\u{10c89}', 68809), ('\u{10c8a}', 68810), + ('\u{10c8b}', 68811), ('\u{10c8c}', 68812), ('\u{10c8d}', 68813), ('\u{10c8e}', 68814), + ('\u{10c8f}', 68815), ('\u{10c90}', 68816), ('\u{10c91}', 68817), ('\u{10c92}', 68818), + ('\u{10c93}', 68819), ('\u{10c94}', 68820), ('\u{10c95}', 68821), ('\u{10c96}', 68822), + ('\u{10c97}', 68823), ('\u{10c98}', 68824), ('\u{10c99}', 68825), ('\u{10c9a}', 68826), + ('\u{10c9b}', 68827), ('\u{10c9c}', 68828), ('\u{10c9d}', 68829), ('\u{10c9e}', 68830), + ('\u{10c9f}', 68831), ('\u{10ca0}', 68832), ('\u{10ca1}', 68833), ('\u{10ca2}', 68834), + ('\u{10ca3}', 68835), ('\u{10ca4}', 68836), ('\u{10ca5}', 68837), ('\u{10ca6}', 68838), + ('\u{10ca7}', 68839), ('\u{10ca8}', 68840), ('\u{10ca9}', 68841), ('\u{10caa}', 68842), + ('\u{10cab}', 68843), ('\u{10cac}', 68844), ('\u{10cad}', 68845), ('\u{10cae}', 68846), + ('\u{10caf}', 68847), ('\u{10cb0}', 68848), ('\u{10cb1}', 68849), ('\u{10cb2}', 68850), + ('\u{10d50}', 68976), ('\u{10d51}', 68977), ('\u{10d52}', 68978), ('\u{10d53}', 68979), + ('\u{10d54}', 68980), ('\u{10d55}', 68981), ('\u{10d56}', 68982), ('\u{10d57}', 68983), + ('\u{10d58}', 68984), ('\u{10d59}', 68985), ('\u{10d5a}', 68986), ('\u{10d5b}', 68987), + ('\u{10d5c}', 68988), ('\u{10d5d}', 68989), ('\u{10d5e}', 68990), ('\u{10d5f}', 68991), + ('\u{10d60}', 68992), ('\u{10d61}', 68993), ('\u{10d62}', 68994), ('\u{10d63}', 68995), + ('\u{10d64}', 68996), ('\u{10d65}', 68997), ('\u{118a0}', 71872), ('\u{118a1}', 71873), + ('\u{118a2}', 71874), ('\u{118a3}', 71875), ('\u{118a4}', 71876), ('\u{118a5}', 71877), + ('\u{118a6}', 71878), ('\u{118a7}', 71879), ('\u{118a8}', 71880), ('\u{118a9}', 71881), + ('\u{118aa}', 71882), ('\u{118ab}', 71883), ('\u{118ac}', 71884), ('\u{118ad}', 71885), + ('\u{118ae}', 71886), ('\u{118af}', 71887), ('\u{118b0}', 71888), ('\u{118b1}', 71889), + ('\u{118b2}', 71890), ('\u{118b3}', 71891), ('\u{118b4}', 71892), ('\u{118b5}', 71893), + ('\u{118b6}', 71894), ('\u{118b7}', 71895), ('\u{118b8}', 71896), ('\u{118b9}', 71897), + ('\u{118ba}', 71898), ('\u{118bb}', 71899), ('\u{118bc}', 71900), ('\u{118bd}', 71901), + ('\u{118be}', 71902), ('\u{118bf}', 71903), ('\u{16e40}', 93792), ('\u{16e41}', 93793), + ('\u{16e42}', 93794), ('\u{16e43}', 93795), ('\u{16e44}', 93796), ('\u{16e45}', 93797), + ('\u{16e46}', 93798), ('\u{16e47}', 93799), ('\u{16e48}', 93800), ('\u{16e49}', 93801), + ('\u{16e4a}', 93802), ('\u{16e4b}', 93803), ('\u{16e4c}', 93804), ('\u{16e4d}', 93805), + ('\u{16e4e}', 93806), ('\u{16e4f}', 93807), ('\u{16e50}', 93808), ('\u{16e51}', 93809), + ('\u{16e52}', 93810), ('\u{16e53}', 93811), ('\u{16e54}', 93812), ('\u{16e55}', 93813), + ('\u{16e56}', 93814), ('\u{16e57}', 93815), ('\u{16e58}', 93816), ('\u{16e59}', 93817), + ('\u{16e5a}', 93818), ('\u{16e5b}', 93819), ('\u{16e5c}', 93820), ('\u{16e5d}', 93821), + ('\u{16e5e}', 93822), ('\u{16e5f}', 93823), ('\u{16ea0}', 93883), ('\u{16ea1}', 93884), + ('\u{16ea2}', 93885), ('\u{16ea3}', 93886), ('\u{16ea4}', 93887), ('\u{16ea5}', 93888), + ('\u{16ea6}', 93889), ('\u{16ea7}', 93890), ('\u{16ea8}', 93891), ('\u{16ea9}', 93892), + ('\u{16eaa}', 93893), ('\u{16eab}', 93894), ('\u{16eac}', 93895), ('\u{16ead}', 93896), + ('\u{16eae}', 93897), ('\u{16eaf}', 93898), ('\u{16eb0}', 93899), ('\u{16eb1}', 93900), + ('\u{16eb2}', 93901), ('\u{16eb3}', 93902), ('\u{16eb4}', 93903), ('\u{16eb5}', 93904), + ('\u{16eb6}', 93905), ('\u{16eb7}', 93906), ('\u{16eb8}', 93907), ('\u{1e900}', 125218), + ('\u{1e901}', 125219), ('\u{1e902}', 125220), ('\u{1e903}', 125221), ('\u{1e904}', 125222), + ('\u{1e905}', 125223), ('\u{1e906}', 125224), ('\u{1e907}', 125225), ('\u{1e908}', 125226), + ('\u{1e909}', 125227), ('\u{1e90a}', 125228), ('\u{1e90b}', 125229), ('\u{1e90c}', 125230), + ('\u{1e90d}', 125231), ('\u{1e90e}', 125232), ('\u{1e90f}', 125233), ('\u{1e910}', 125234), + ('\u{1e911}', 125235), ('\u{1e912}', 125236), ('\u{1e913}', 125237), ('\u{1e914}', 125238), + ('\u{1e915}', 125239), ('\u{1e916}', 125240), ('\u{1e917}', 125241), ('\u{1e918}', 125242), + ('\u{1e919}', 125243), ('\u{1e91a}', 125244), ('\u{1e91b}', 125245), ('\u{1e91c}', 125246), + ('\u{1e91d}', 125247), ('\u{1e91e}', 125248), ('\u{1e91f}', 125249), ('\u{1e920}', 125250), + ('\u{1e921}', 125251), + ]; + + static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[ + ['i', '\u{307}', '\u{0}'], + ]; + static UPPERCASE_TABLE: &[(char, u32); 1554] = &[ - ('\u{b5}', 0x39c), ('\u{df}', 0x400000), ('\u{e0}', 0xc0), ('\u{e1}', 0xc1), - ('\u{e2}', 0xc2), ('\u{e3}', 0xc3), ('\u{e4}', 0xc4), ('\u{e5}', 0xc5), ('\u{e6}', 0xc6), - ('\u{e7}', 0xc7), ('\u{e8}', 0xc8), ('\u{e9}', 0xc9), ('\u{ea}', 0xca), ('\u{eb}', 0xcb), - ('\u{ec}', 0xcc), ('\u{ed}', 0xcd), ('\u{ee}', 0xce), ('\u{ef}', 0xcf), ('\u{f0}', 0xd0), - ('\u{f1}', 0xd1), ('\u{f2}', 0xd2), ('\u{f3}', 0xd3), ('\u{f4}', 0xd4), ('\u{f5}', 0xd5), - ('\u{f6}', 0xd6), ('\u{f8}', 0xd8), ('\u{f9}', 0xd9), ('\u{fa}', 0xda), ('\u{fb}', 0xdb), - ('\u{fc}', 0xdc), ('\u{fd}', 0xdd), ('\u{fe}', 0xde), ('\u{ff}', 0x178), ('\u{101}', 0x100), - ('\u{103}', 0x102), ('\u{105}', 0x104), ('\u{107}', 0x106), ('\u{109}', 0x108), - ('\u{10b}', 0x10a), ('\u{10d}', 0x10c), ('\u{10f}', 0x10e), ('\u{111}', 0x110), - ('\u{113}', 0x112), ('\u{115}', 0x114), ('\u{117}', 0x116), ('\u{119}', 0x118), - ('\u{11b}', 0x11a), ('\u{11d}', 0x11c), ('\u{11f}', 0x11e), ('\u{121}', 0x120), - ('\u{123}', 0x122), ('\u{125}', 0x124), ('\u{127}', 0x126), ('\u{129}', 0x128), - ('\u{12b}', 0x12a), ('\u{12d}', 0x12c), ('\u{12f}', 0x12e), ('\u{131}', 0x49), - ('\u{133}', 0x132), ('\u{135}', 0x134), ('\u{137}', 0x136), ('\u{13a}', 0x139), - ('\u{13c}', 0x13b), ('\u{13e}', 0x13d), ('\u{140}', 0x13f), ('\u{142}', 0x141), - ('\u{144}', 0x143), ('\u{146}', 0x145), ('\u{148}', 0x147), ('\u{149}', 0x400001), - ('\u{14b}', 0x14a), ('\u{14d}', 0x14c), ('\u{14f}', 0x14e), ('\u{151}', 0x150), - ('\u{153}', 0x152), ('\u{155}', 0x154), ('\u{157}', 0x156), ('\u{159}', 0x158), - ('\u{15b}', 0x15a), ('\u{15d}', 0x15c), ('\u{15f}', 0x15e), ('\u{161}', 0x160), - ('\u{163}', 0x162), ('\u{165}', 0x164), ('\u{167}', 0x166), ('\u{169}', 0x168), - ('\u{16b}', 0x16a), ('\u{16d}', 0x16c), ('\u{16f}', 0x16e), ('\u{171}', 0x170), - ('\u{173}', 0x172), ('\u{175}', 0x174), ('\u{177}', 0x176), ('\u{17a}', 0x179), - ('\u{17c}', 0x17b), ('\u{17e}', 0x17d), ('\u{17f}', 0x53), ('\u{180}', 0x243), - ('\u{183}', 0x182), ('\u{185}', 0x184), ('\u{188}', 0x187), ('\u{18c}', 0x18b), - ('\u{192}', 0x191), ('\u{195}', 0x1f6), ('\u{199}', 0x198), ('\u{19a}', 0x23d), - ('\u{19b}', 0xa7dc), ('\u{19e}', 0x220), ('\u{1a1}', 0x1a0), ('\u{1a3}', 0x1a2), - ('\u{1a5}', 0x1a4), ('\u{1a8}', 0x1a7), ('\u{1ad}', 0x1ac), ('\u{1b0}', 0x1af), - ('\u{1b4}', 0x1b3), ('\u{1b6}', 0x1b5), ('\u{1b9}', 0x1b8), ('\u{1bd}', 0x1bc), - ('\u{1bf}', 0x1f7), ('\u{1c5}', 0x1c4), ('\u{1c6}', 0x1c4), ('\u{1c8}', 0x1c7), - ('\u{1c9}', 0x1c7), ('\u{1cb}', 0x1ca), ('\u{1cc}', 0x1ca), ('\u{1ce}', 0x1cd), - ('\u{1d0}', 0x1cf), ('\u{1d2}', 0x1d1), ('\u{1d4}', 0x1d3), ('\u{1d6}', 0x1d5), - ('\u{1d8}', 0x1d7), ('\u{1da}', 0x1d9), ('\u{1dc}', 0x1db), ('\u{1dd}', 0x18e), - ('\u{1df}', 0x1de), ('\u{1e1}', 0x1e0), ('\u{1e3}', 0x1e2), ('\u{1e5}', 0x1e4), - ('\u{1e7}', 0x1e6), ('\u{1e9}', 0x1e8), ('\u{1eb}', 0x1ea), ('\u{1ed}', 0x1ec), - ('\u{1ef}', 0x1ee), ('\u{1f0}', 0x400002), ('\u{1f2}', 0x1f1), ('\u{1f3}', 0x1f1), - ('\u{1f5}', 0x1f4), ('\u{1f9}', 0x1f8), ('\u{1fb}', 0x1fa), ('\u{1fd}', 0x1fc), - ('\u{1ff}', 0x1fe), ('\u{201}', 0x200), ('\u{203}', 0x202), ('\u{205}', 0x204), - ('\u{207}', 0x206), ('\u{209}', 0x208), ('\u{20b}', 0x20a), ('\u{20d}', 0x20c), - ('\u{20f}', 0x20e), ('\u{211}', 0x210), ('\u{213}', 0x212), ('\u{215}', 0x214), - ('\u{217}', 0x216), ('\u{219}', 0x218), ('\u{21b}', 0x21a), ('\u{21d}', 0x21c), - ('\u{21f}', 0x21e), ('\u{223}', 0x222), ('\u{225}', 0x224), ('\u{227}', 0x226), - ('\u{229}', 0x228), ('\u{22b}', 0x22a), ('\u{22d}', 0x22c), ('\u{22f}', 0x22e), - ('\u{231}', 0x230), ('\u{233}', 0x232), ('\u{23c}', 0x23b), ('\u{23f}', 0x2c7e), - ('\u{240}', 0x2c7f), ('\u{242}', 0x241), ('\u{247}', 0x246), ('\u{249}', 0x248), - ('\u{24b}', 0x24a), ('\u{24d}', 0x24c), ('\u{24f}', 0x24e), ('\u{250}', 0x2c6f), - ('\u{251}', 0x2c6d), ('\u{252}', 0x2c70), ('\u{253}', 0x181), ('\u{254}', 0x186), - ('\u{256}', 0x189), ('\u{257}', 0x18a), ('\u{259}', 0x18f), ('\u{25b}', 0x190), - ('\u{25c}', 0xa7ab), ('\u{260}', 0x193), ('\u{261}', 0xa7ac), ('\u{263}', 0x194), - ('\u{264}', 0xa7cb), ('\u{265}', 0xa78d), ('\u{266}', 0xa7aa), ('\u{268}', 0x197), - ('\u{269}', 0x196), ('\u{26a}', 0xa7ae), ('\u{26b}', 0x2c62), ('\u{26c}', 0xa7ad), - ('\u{26f}', 0x19c), ('\u{271}', 0x2c6e), ('\u{272}', 0x19d), ('\u{275}', 0x19f), - ('\u{27d}', 0x2c64), ('\u{280}', 0x1a6), ('\u{282}', 0xa7c5), ('\u{283}', 0x1a9), - ('\u{287}', 0xa7b1), ('\u{288}', 0x1ae), ('\u{289}', 0x244), ('\u{28a}', 0x1b1), - ('\u{28b}', 0x1b2), ('\u{28c}', 0x245), ('\u{292}', 0x1b7), ('\u{29d}', 0xa7b2), - ('\u{29e}', 0xa7b0), ('\u{345}', 0x399), ('\u{371}', 0x370), ('\u{373}', 0x372), - ('\u{377}', 0x376), ('\u{37b}', 0x3fd), ('\u{37c}', 0x3fe), ('\u{37d}', 0x3ff), - ('\u{390}', 0x400003), ('\u{3ac}', 0x386), ('\u{3ad}', 0x388), ('\u{3ae}', 0x389), - ('\u{3af}', 0x38a), ('\u{3b0}', 0x400004), ('\u{3b1}', 0x391), ('\u{3b2}', 0x392), - ('\u{3b3}', 0x393), ('\u{3b4}', 0x394), ('\u{3b5}', 0x395), ('\u{3b6}', 0x396), - ('\u{3b7}', 0x397), ('\u{3b8}', 0x398), ('\u{3b9}', 0x399), ('\u{3ba}', 0x39a), - ('\u{3bb}', 0x39b), ('\u{3bc}', 0x39c), ('\u{3bd}', 0x39d), ('\u{3be}', 0x39e), - ('\u{3bf}', 0x39f), ('\u{3c0}', 0x3a0), ('\u{3c1}', 0x3a1), ('\u{3c2}', 0x3a3), - ('\u{3c3}', 0x3a3), ('\u{3c4}', 0x3a4), ('\u{3c5}', 0x3a5), ('\u{3c6}', 0x3a6), - ('\u{3c7}', 0x3a7), ('\u{3c8}', 0x3a8), ('\u{3c9}', 0x3a9), ('\u{3ca}', 0x3aa), - ('\u{3cb}', 0x3ab), ('\u{3cc}', 0x38c), ('\u{3cd}', 0x38e), ('\u{3ce}', 0x38f), - ('\u{3d0}', 0x392), ('\u{3d1}', 0x398), ('\u{3d5}', 0x3a6), ('\u{3d6}', 0x3a0), - ('\u{3d7}', 0x3cf), ('\u{3d9}', 0x3d8), ('\u{3db}', 0x3da), ('\u{3dd}', 0x3dc), - ('\u{3df}', 0x3de), ('\u{3e1}', 0x3e0), ('\u{3e3}', 0x3e2), ('\u{3e5}', 0x3e4), - ('\u{3e7}', 0x3e6), ('\u{3e9}', 0x3e8), ('\u{3eb}', 0x3ea), ('\u{3ed}', 0x3ec), - ('\u{3ef}', 0x3ee), ('\u{3f0}', 0x39a), ('\u{3f1}', 0x3a1), ('\u{3f2}', 0x3f9), - ('\u{3f3}', 0x37f), ('\u{3f5}', 0x395), ('\u{3f8}', 0x3f7), ('\u{3fb}', 0x3fa), - ('\u{430}', 0x410), ('\u{431}', 0x411), ('\u{432}', 0x412), ('\u{433}', 0x413), - ('\u{434}', 0x414), ('\u{435}', 0x415), ('\u{436}', 0x416), ('\u{437}', 0x417), - ('\u{438}', 0x418), ('\u{439}', 0x419), ('\u{43a}', 0x41a), ('\u{43b}', 0x41b), - ('\u{43c}', 0x41c), ('\u{43d}', 0x41d), ('\u{43e}', 0x41e), ('\u{43f}', 0x41f), - ('\u{440}', 0x420), ('\u{441}', 0x421), ('\u{442}', 0x422), ('\u{443}', 0x423), - ('\u{444}', 0x424), ('\u{445}', 0x425), ('\u{446}', 0x426), ('\u{447}', 0x427), - ('\u{448}', 0x428), ('\u{449}', 0x429), ('\u{44a}', 0x42a), ('\u{44b}', 0x42b), - ('\u{44c}', 0x42c), ('\u{44d}', 0x42d), ('\u{44e}', 0x42e), ('\u{44f}', 0x42f), - ('\u{450}', 0x400), ('\u{451}', 0x401), ('\u{452}', 0x402), ('\u{453}', 0x403), - ('\u{454}', 0x404), ('\u{455}', 0x405), ('\u{456}', 0x406), ('\u{457}', 0x407), - ('\u{458}', 0x408), ('\u{459}', 0x409), ('\u{45a}', 0x40a), ('\u{45b}', 0x40b), - ('\u{45c}', 0x40c), ('\u{45d}', 0x40d), ('\u{45e}', 0x40e), ('\u{45f}', 0x40f), - ('\u{461}', 0x460), ('\u{463}', 0x462), ('\u{465}', 0x464), ('\u{467}', 0x466), - ('\u{469}', 0x468), ('\u{46b}', 0x46a), ('\u{46d}', 0x46c), ('\u{46f}', 0x46e), - ('\u{471}', 0x470), ('\u{473}', 0x472), ('\u{475}', 0x474), ('\u{477}', 0x476), - ('\u{479}', 0x478), ('\u{47b}', 0x47a), ('\u{47d}', 0x47c), ('\u{47f}', 0x47e), - ('\u{481}', 0x480), ('\u{48b}', 0x48a), ('\u{48d}', 0x48c), ('\u{48f}', 0x48e), - ('\u{491}', 0x490), ('\u{493}', 0x492), ('\u{495}', 0x494), ('\u{497}', 0x496), - ('\u{499}', 0x498), ('\u{49b}', 0x49a), ('\u{49d}', 0x49c), ('\u{49f}', 0x49e), - ('\u{4a1}', 0x4a0), ('\u{4a3}', 0x4a2), ('\u{4a5}', 0x4a4), ('\u{4a7}', 0x4a6), - ('\u{4a9}', 0x4a8), ('\u{4ab}', 0x4aa), ('\u{4ad}', 0x4ac), ('\u{4af}', 0x4ae), - ('\u{4b1}', 0x4b0), ('\u{4b3}', 0x4b2), ('\u{4b5}', 0x4b4), ('\u{4b7}', 0x4b6), - ('\u{4b9}', 0x4b8), ('\u{4bb}', 0x4ba), ('\u{4bd}', 0x4bc), ('\u{4bf}', 0x4be), - ('\u{4c2}', 0x4c1), ('\u{4c4}', 0x4c3), ('\u{4c6}', 0x4c5), ('\u{4c8}', 0x4c7), - ('\u{4ca}', 0x4c9), ('\u{4cc}', 0x4cb), ('\u{4ce}', 0x4cd), ('\u{4cf}', 0x4c0), - ('\u{4d1}', 0x4d0), ('\u{4d3}', 0x4d2), ('\u{4d5}', 0x4d4), ('\u{4d7}', 0x4d6), - ('\u{4d9}', 0x4d8), ('\u{4db}', 0x4da), ('\u{4dd}', 0x4dc), ('\u{4df}', 0x4de), - ('\u{4e1}', 0x4e0), ('\u{4e3}', 0x4e2), ('\u{4e5}', 0x4e4), ('\u{4e7}', 0x4e6), - ('\u{4e9}', 0x4e8), ('\u{4eb}', 0x4ea), ('\u{4ed}', 0x4ec), ('\u{4ef}', 0x4ee), - ('\u{4f1}', 0x4f0), ('\u{4f3}', 0x4f2), ('\u{4f5}', 0x4f4), ('\u{4f7}', 0x4f6), - ('\u{4f9}', 0x4f8), ('\u{4fb}', 0x4fa), ('\u{4fd}', 0x4fc), ('\u{4ff}', 0x4fe), - ('\u{501}', 0x500), ('\u{503}', 0x502), ('\u{505}', 0x504), ('\u{507}', 0x506), - ('\u{509}', 0x508), ('\u{50b}', 0x50a), ('\u{50d}', 0x50c), ('\u{50f}', 0x50e), - ('\u{511}', 0x510), ('\u{513}', 0x512), ('\u{515}', 0x514), ('\u{517}', 0x516), - ('\u{519}', 0x518), ('\u{51b}', 0x51a), ('\u{51d}', 0x51c), ('\u{51f}', 0x51e), - ('\u{521}', 0x520), ('\u{523}', 0x522), ('\u{525}', 0x524), ('\u{527}', 0x526), - ('\u{529}', 0x528), ('\u{52b}', 0x52a), ('\u{52d}', 0x52c), ('\u{52f}', 0x52e), - ('\u{561}', 0x531), ('\u{562}', 0x532), ('\u{563}', 0x533), ('\u{564}', 0x534), - ('\u{565}', 0x535), ('\u{566}', 0x536), ('\u{567}', 0x537), ('\u{568}', 0x538), - ('\u{569}', 0x539), ('\u{56a}', 0x53a), ('\u{56b}', 0x53b), ('\u{56c}', 0x53c), - ('\u{56d}', 0x53d), ('\u{56e}', 0x53e), ('\u{56f}', 0x53f), ('\u{570}', 0x540), - ('\u{571}', 0x541), ('\u{572}', 0x542), ('\u{573}', 0x543), ('\u{574}', 0x544), - ('\u{575}', 0x545), ('\u{576}', 0x546), ('\u{577}', 0x547), ('\u{578}', 0x548), - ('\u{579}', 0x549), ('\u{57a}', 0x54a), ('\u{57b}', 0x54b), ('\u{57c}', 0x54c), - ('\u{57d}', 0x54d), ('\u{57e}', 0x54e), ('\u{57f}', 0x54f), ('\u{580}', 0x550), - ('\u{581}', 0x551), ('\u{582}', 0x552), ('\u{583}', 0x553), ('\u{584}', 0x554), - ('\u{585}', 0x555), ('\u{586}', 0x556), ('\u{587}', 0x400005), ('\u{10d0}', 0x1c90), - ('\u{10d1}', 0x1c91), ('\u{10d2}', 0x1c92), ('\u{10d3}', 0x1c93), ('\u{10d4}', 0x1c94), - ('\u{10d5}', 0x1c95), ('\u{10d6}', 0x1c96), ('\u{10d7}', 0x1c97), ('\u{10d8}', 0x1c98), - ('\u{10d9}', 0x1c99), ('\u{10da}', 0x1c9a), ('\u{10db}', 0x1c9b), ('\u{10dc}', 0x1c9c), - ('\u{10dd}', 0x1c9d), ('\u{10de}', 0x1c9e), ('\u{10df}', 0x1c9f), ('\u{10e0}', 0x1ca0), - ('\u{10e1}', 0x1ca1), ('\u{10e2}', 0x1ca2), ('\u{10e3}', 0x1ca3), ('\u{10e4}', 0x1ca4), - ('\u{10e5}', 0x1ca5), ('\u{10e6}', 0x1ca6), ('\u{10e7}', 0x1ca7), ('\u{10e8}', 0x1ca8), - ('\u{10e9}', 0x1ca9), ('\u{10ea}', 0x1caa), ('\u{10eb}', 0x1cab), ('\u{10ec}', 0x1cac), - ('\u{10ed}', 0x1cad), ('\u{10ee}', 0x1cae), ('\u{10ef}', 0x1caf), ('\u{10f0}', 0x1cb0), - ('\u{10f1}', 0x1cb1), ('\u{10f2}', 0x1cb2), ('\u{10f3}', 0x1cb3), ('\u{10f4}', 0x1cb4), - ('\u{10f5}', 0x1cb5), ('\u{10f6}', 0x1cb6), ('\u{10f7}', 0x1cb7), ('\u{10f8}', 0x1cb8), - ('\u{10f9}', 0x1cb9), ('\u{10fa}', 0x1cba), ('\u{10fd}', 0x1cbd), ('\u{10fe}', 0x1cbe), - ('\u{10ff}', 0x1cbf), ('\u{13f8}', 0x13f0), ('\u{13f9}', 0x13f1), ('\u{13fa}', 0x13f2), - ('\u{13fb}', 0x13f3), ('\u{13fc}', 0x13f4), ('\u{13fd}', 0x13f5), ('\u{1c80}', 0x412), - ('\u{1c81}', 0x414), ('\u{1c82}', 0x41e), ('\u{1c83}', 0x421), ('\u{1c84}', 0x422), - ('\u{1c85}', 0x422), ('\u{1c86}', 0x42a), ('\u{1c87}', 0x462), ('\u{1c88}', 0xa64a), - ('\u{1c8a}', 0x1c89), ('\u{1d79}', 0xa77d), ('\u{1d7d}', 0x2c63), ('\u{1d8e}', 0xa7c6), - ('\u{1e01}', 0x1e00), ('\u{1e03}', 0x1e02), ('\u{1e05}', 0x1e04), ('\u{1e07}', 0x1e06), - ('\u{1e09}', 0x1e08), ('\u{1e0b}', 0x1e0a), ('\u{1e0d}', 0x1e0c), ('\u{1e0f}', 0x1e0e), - ('\u{1e11}', 0x1e10), ('\u{1e13}', 0x1e12), ('\u{1e15}', 0x1e14), ('\u{1e17}', 0x1e16), - ('\u{1e19}', 0x1e18), ('\u{1e1b}', 0x1e1a), ('\u{1e1d}', 0x1e1c), ('\u{1e1f}', 0x1e1e), - ('\u{1e21}', 0x1e20), ('\u{1e23}', 0x1e22), ('\u{1e25}', 0x1e24), ('\u{1e27}', 0x1e26), - ('\u{1e29}', 0x1e28), ('\u{1e2b}', 0x1e2a), ('\u{1e2d}', 0x1e2c), ('\u{1e2f}', 0x1e2e), - ('\u{1e31}', 0x1e30), ('\u{1e33}', 0x1e32), ('\u{1e35}', 0x1e34), ('\u{1e37}', 0x1e36), - ('\u{1e39}', 0x1e38), ('\u{1e3b}', 0x1e3a), ('\u{1e3d}', 0x1e3c), ('\u{1e3f}', 0x1e3e), - ('\u{1e41}', 0x1e40), ('\u{1e43}', 0x1e42), ('\u{1e45}', 0x1e44), ('\u{1e47}', 0x1e46), - ('\u{1e49}', 0x1e48), ('\u{1e4b}', 0x1e4a), ('\u{1e4d}', 0x1e4c), ('\u{1e4f}', 0x1e4e), - ('\u{1e51}', 0x1e50), ('\u{1e53}', 0x1e52), ('\u{1e55}', 0x1e54), ('\u{1e57}', 0x1e56), - ('\u{1e59}', 0x1e58), ('\u{1e5b}', 0x1e5a), ('\u{1e5d}', 0x1e5c), ('\u{1e5f}', 0x1e5e), - ('\u{1e61}', 0x1e60), ('\u{1e63}', 0x1e62), ('\u{1e65}', 0x1e64), ('\u{1e67}', 0x1e66), - ('\u{1e69}', 0x1e68), ('\u{1e6b}', 0x1e6a), ('\u{1e6d}', 0x1e6c), ('\u{1e6f}', 0x1e6e), - ('\u{1e71}', 0x1e70), ('\u{1e73}', 0x1e72), ('\u{1e75}', 0x1e74), ('\u{1e77}', 0x1e76), - ('\u{1e79}', 0x1e78), ('\u{1e7b}', 0x1e7a), ('\u{1e7d}', 0x1e7c), ('\u{1e7f}', 0x1e7e), - ('\u{1e81}', 0x1e80), ('\u{1e83}', 0x1e82), ('\u{1e85}', 0x1e84), ('\u{1e87}', 0x1e86), - ('\u{1e89}', 0x1e88), ('\u{1e8b}', 0x1e8a), ('\u{1e8d}', 0x1e8c), ('\u{1e8f}', 0x1e8e), - ('\u{1e91}', 0x1e90), ('\u{1e93}', 0x1e92), ('\u{1e95}', 0x1e94), ('\u{1e96}', 0x400006), - ('\u{1e97}', 0x400007), ('\u{1e98}', 0x400008), ('\u{1e99}', 0x400009), - ('\u{1e9a}', 0x40000a), ('\u{1e9b}', 0x1e60), ('\u{1ea1}', 0x1ea0), ('\u{1ea3}', 0x1ea2), - ('\u{1ea5}', 0x1ea4), ('\u{1ea7}', 0x1ea6), ('\u{1ea9}', 0x1ea8), ('\u{1eab}', 0x1eaa), - ('\u{1ead}', 0x1eac), ('\u{1eaf}', 0x1eae), ('\u{1eb1}', 0x1eb0), ('\u{1eb3}', 0x1eb2), - ('\u{1eb5}', 0x1eb4), ('\u{1eb7}', 0x1eb6), ('\u{1eb9}', 0x1eb8), ('\u{1ebb}', 0x1eba), - ('\u{1ebd}', 0x1ebc), ('\u{1ebf}', 0x1ebe), ('\u{1ec1}', 0x1ec0), ('\u{1ec3}', 0x1ec2), - ('\u{1ec5}', 0x1ec4), ('\u{1ec7}', 0x1ec6), ('\u{1ec9}', 0x1ec8), ('\u{1ecb}', 0x1eca), - ('\u{1ecd}', 0x1ecc), ('\u{1ecf}', 0x1ece), ('\u{1ed1}', 0x1ed0), ('\u{1ed3}', 0x1ed2), - ('\u{1ed5}', 0x1ed4), ('\u{1ed7}', 0x1ed6), ('\u{1ed9}', 0x1ed8), ('\u{1edb}', 0x1eda), - ('\u{1edd}', 0x1edc), ('\u{1edf}', 0x1ede), ('\u{1ee1}', 0x1ee0), ('\u{1ee3}', 0x1ee2), - ('\u{1ee5}', 0x1ee4), ('\u{1ee7}', 0x1ee6), ('\u{1ee9}', 0x1ee8), ('\u{1eeb}', 0x1eea), - ('\u{1eed}', 0x1eec), ('\u{1eef}', 0x1eee), ('\u{1ef1}', 0x1ef0), ('\u{1ef3}', 0x1ef2), - ('\u{1ef5}', 0x1ef4), ('\u{1ef7}', 0x1ef6), ('\u{1ef9}', 0x1ef8), ('\u{1efb}', 0x1efa), - ('\u{1efd}', 0x1efc), ('\u{1eff}', 0x1efe), ('\u{1f00}', 0x1f08), ('\u{1f01}', 0x1f09), - ('\u{1f02}', 0x1f0a), ('\u{1f03}', 0x1f0b), ('\u{1f04}', 0x1f0c), ('\u{1f05}', 0x1f0d), - ('\u{1f06}', 0x1f0e), ('\u{1f07}', 0x1f0f), ('\u{1f10}', 0x1f18), ('\u{1f11}', 0x1f19), - ('\u{1f12}', 0x1f1a), ('\u{1f13}', 0x1f1b), ('\u{1f14}', 0x1f1c), ('\u{1f15}', 0x1f1d), - ('\u{1f20}', 0x1f28), ('\u{1f21}', 0x1f29), ('\u{1f22}', 0x1f2a), ('\u{1f23}', 0x1f2b), - ('\u{1f24}', 0x1f2c), ('\u{1f25}', 0x1f2d), ('\u{1f26}', 0x1f2e), ('\u{1f27}', 0x1f2f), - ('\u{1f30}', 0x1f38), ('\u{1f31}', 0x1f39), ('\u{1f32}', 0x1f3a), ('\u{1f33}', 0x1f3b), - ('\u{1f34}', 0x1f3c), ('\u{1f35}', 0x1f3d), ('\u{1f36}', 0x1f3e), ('\u{1f37}', 0x1f3f), - ('\u{1f40}', 0x1f48), ('\u{1f41}', 0x1f49), ('\u{1f42}', 0x1f4a), ('\u{1f43}', 0x1f4b), - ('\u{1f44}', 0x1f4c), ('\u{1f45}', 0x1f4d), ('\u{1f50}', 0x40000b), ('\u{1f51}', 0x1f59), - ('\u{1f52}', 0x40000c), ('\u{1f53}', 0x1f5b), ('\u{1f54}', 0x40000d), ('\u{1f55}', 0x1f5d), - ('\u{1f56}', 0x40000e), ('\u{1f57}', 0x1f5f), ('\u{1f60}', 0x1f68), ('\u{1f61}', 0x1f69), - ('\u{1f62}', 0x1f6a), ('\u{1f63}', 0x1f6b), ('\u{1f64}', 0x1f6c), ('\u{1f65}', 0x1f6d), - ('\u{1f66}', 0x1f6e), ('\u{1f67}', 0x1f6f), ('\u{1f70}', 0x1fba), ('\u{1f71}', 0x1fbb), - ('\u{1f72}', 0x1fc8), ('\u{1f73}', 0x1fc9), ('\u{1f74}', 0x1fca), ('\u{1f75}', 0x1fcb), - ('\u{1f76}', 0x1fda), ('\u{1f77}', 0x1fdb), ('\u{1f78}', 0x1ff8), ('\u{1f79}', 0x1ff9), - ('\u{1f7a}', 0x1fea), ('\u{1f7b}', 0x1feb), ('\u{1f7c}', 0x1ffa), ('\u{1f7d}', 0x1ffb), - ('\u{1f80}', 0x40000f), ('\u{1f81}', 0x400010), ('\u{1f82}', 0x400011), - ('\u{1f83}', 0x400012), ('\u{1f84}', 0x400013), ('\u{1f85}', 0x400014), - ('\u{1f86}', 0x400015), ('\u{1f87}', 0x400016), ('\u{1f88}', 0x400017), - ('\u{1f89}', 0x400018), ('\u{1f8a}', 0x400019), ('\u{1f8b}', 0x40001a), - ('\u{1f8c}', 0x40001b), ('\u{1f8d}', 0x40001c), ('\u{1f8e}', 0x40001d), - ('\u{1f8f}', 0x40001e), ('\u{1f90}', 0x40001f), ('\u{1f91}', 0x400020), - ('\u{1f92}', 0x400021), ('\u{1f93}', 0x400022), ('\u{1f94}', 0x400023), - ('\u{1f95}', 0x400024), ('\u{1f96}', 0x400025), ('\u{1f97}', 0x400026), - ('\u{1f98}', 0x400027), ('\u{1f99}', 0x400028), ('\u{1f9a}', 0x400029), - ('\u{1f9b}', 0x40002a), ('\u{1f9c}', 0x40002b), ('\u{1f9d}', 0x40002c), - ('\u{1f9e}', 0x40002d), ('\u{1f9f}', 0x40002e), ('\u{1fa0}', 0x40002f), - ('\u{1fa1}', 0x400030), ('\u{1fa2}', 0x400031), ('\u{1fa3}', 0x400032), - ('\u{1fa4}', 0x400033), ('\u{1fa5}', 0x400034), ('\u{1fa6}', 0x400035), - ('\u{1fa7}', 0x400036), ('\u{1fa8}', 0x400037), ('\u{1fa9}', 0x400038), - ('\u{1faa}', 0x400039), ('\u{1fab}', 0x40003a), ('\u{1fac}', 0x40003b), - ('\u{1fad}', 0x40003c), ('\u{1fae}', 0x40003d), ('\u{1faf}', 0x40003e), - ('\u{1fb0}', 0x1fb8), ('\u{1fb1}', 0x1fb9), ('\u{1fb2}', 0x40003f), ('\u{1fb3}', 0x400040), - ('\u{1fb4}', 0x400041), ('\u{1fb6}', 0x400042), ('\u{1fb7}', 0x400043), - ('\u{1fbc}', 0x400044), ('\u{1fbe}', 0x399), ('\u{1fc2}', 0x400045), ('\u{1fc3}', 0x400046), - ('\u{1fc4}', 0x400047), ('\u{1fc6}', 0x400048), ('\u{1fc7}', 0x400049), - ('\u{1fcc}', 0x40004a), ('\u{1fd0}', 0x1fd8), ('\u{1fd1}', 0x1fd9), ('\u{1fd2}', 0x40004b), - ('\u{1fd3}', 0x40004c), ('\u{1fd6}', 0x40004d), ('\u{1fd7}', 0x40004e), - ('\u{1fe0}', 0x1fe8), ('\u{1fe1}', 0x1fe9), ('\u{1fe2}', 0x40004f), ('\u{1fe3}', 0x400050), - ('\u{1fe4}', 0x400051), ('\u{1fe5}', 0x1fec), ('\u{1fe6}', 0x400052), - ('\u{1fe7}', 0x400053), ('\u{1ff2}', 0x400054), ('\u{1ff3}', 0x400055), - ('\u{1ff4}', 0x400056), ('\u{1ff6}', 0x400057), ('\u{1ff7}', 0x400058), - ('\u{1ffc}', 0x400059), ('\u{214e}', 0x2132), ('\u{2170}', 0x2160), ('\u{2171}', 0x2161), - ('\u{2172}', 0x2162), ('\u{2173}', 0x2163), ('\u{2174}', 0x2164), ('\u{2175}', 0x2165), - ('\u{2176}', 0x2166), ('\u{2177}', 0x2167), ('\u{2178}', 0x2168), ('\u{2179}', 0x2169), - ('\u{217a}', 0x216a), ('\u{217b}', 0x216b), ('\u{217c}', 0x216c), ('\u{217d}', 0x216d), - ('\u{217e}', 0x216e), ('\u{217f}', 0x216f), ('\u{2184}', 0x2183), ('\u{24d0}', 0x24b6), - ('\u{24d1}', 0x24b7), ('\u{24d2}', 0x24b8), ('\u{24d3}', 0x24b9), ('\u{24d4}', 0x24ba), - ('\u{24d5}', 0x24bb), ('\u{24d6}', 0x24bc), ('\u{24d7}', 0x24bd), ('\u{24d8}', 0x24be), - ('\u{24d9}', 0x24bf), ('\u{24da}', 0x24c0), ('\u{24db}', 0x24c1), ('\u{24dc}', 0x24c2), - ('\u{24dd}', 0x24c3), ('\u{24de}', 0x24c4), ('\u{24df}', 0x24c5), ('\u{24e0}', 0x24c6), - ('\u{24e1}', 0x24c7), ('\u{24e2}', 0x24c8), ('\u{24e3}', 0x24c9), ('\u{24e4}', 0x24ca), - ('\u{24e5}', 0x24cb), ('\u{24e6}', 0x24cc), ('\u{24e7}', 0x24cd), ('\u{24e8}', 0x24ce), - ('\u{24e9}', 0x24cf), ('\u{2c30}', 0x2c00), ('\u{2c31}', 0x2c01), ('\u{2c32}', 0x2c02), - ('\u{2c33}', 0x2c03), ('\u{2c34}', 0x2c04), ('\u{2c35}', 0x2c05), ('\u{2c36}', 0x2c06), - ('\u{2c37}', 0x2c07), ('\u{2c38}', 0x2c08), ('\u{2c39}', 0x2c09), ('\u{2c3a}', 0x2c0a), - ('\u{2c3b}', 0x2c0b), ('\u{2c3c}', 0x2c0c), ('\u{2c3d}', 0x2c0d), ('\u{2c3e}', 0x2c0e), - ('\u{2c3f}', 0x2c0f), ('\u{2c40}', 0x2c10), ('\u{2c41}', 0x2c11), ('\u{2c42}', 0x2c12), - ('\u{2c43}', 0x2c13), ('\u{2c44}', 0x2c14), ('\u{2c45}', 0x2c15), ('\u{2c46}', 0x2c16), - ('\u{2c47}', 0x2c17), ('\u{2c48}', 0x2c18), ('\u{2c49}', 0x2c19), ('\u{2c4a}', 0x2c1a), - ('\u{2c4b}', 0x2c1b), ('\u{2c4c}', 0x2c1c), ('\u{2c4d}', 0x2c1d), ('\u{2c4e}', 0x2c1e), - ('\u{2c4f}', 0x2c1f), ('\u{2c50}', 0x2c20), ('\u{2c51}', 0x2c21), ('\u{2c52}', 0x2c22), - ('\u{2c53}', 0x2c23), ('\u{2c54}', 0x2c24), ('\u{2c55}', 0x2c25), ('\u{2c56}', 0x2c26), - ('\u{2c57}', 0x2c27), ('\u{2c58}', 0x2c28), ('\u{2c59}', 0x2c29), ('\u{2c5a}', 0x2c2a), - ('\u{2c5b}', 0x2c2b), ('\u{2c5c}', 0x2c2c), ('\u{2c5d}', 0x2c2d), ('\u{2c5e}', 0x2c2e), - ('\u{2c5f}', 0x2c2f), ('\u{2c61}', 0x2c60), ('\u{2c65}', 0x23a), ('\u{2c66}', 0x23e), - ('\u{2c68}', 0x2c67), ('\u{2c6a}', 0x2c69), ('\u{2c6c}', 0x2c6b), ('\u{2c73}', 0x2c72), - ('\u{2c76}', 0x2c75), ('\u{2c81}', 0x2c80), ('\u{2c83}', 0x2c82), ('\u{2c85}', 0x2c84), - ('\u{2c87}', 0x2c86), ('\u{2c89}', 0x2c88), ('\u{2c8b}', 0x2c8a), ('\u{2c8d}', 0x2c8c), - ('\u{2c8f}', 0x2c8e), ('\u{2c91}', 0x2c90), ('\u{2c93}', 0x2c92), ('\u{2c95}', 0x2c94), - ('\u{2c97}', 0x2c96), ('\u{2c99}', 0x2c98), ('\u{2c9b}', 0x2c9a), ('\u{2c9d}', 0x2c9c), - ('\u{2c9f}', 0x2c9e), ('\u{2ca1}', 0x2ca0), ('\u{2ca3}', 0x2ca2), ('\u{2ca5}', 0x2ca4), - ('\u{2ca7}', 0x2ca6), ('\u{2ca9}', 0x2ca8), ('\u{2cab}', 0x2caa), ('\u{2cad}', 0x2cac), - ('\u{2caf}', 0x2cae), ('\u{2cb1}', 0x2cb0), ('\u{2cb3}', 0x2cb2), ('\u{2cb5}', 0x2cb4), - ('\u{2cb7}', 0x2cb6), ('\u{2cb9}', 0x2cb8), ('\u{2cbb}', 0x2cba), ('\u{2cbd}', 0x2cbc), - ('\u{2cbf}', 0x2cbe), ('\u{2cc1}', 0x2cc0), ('\u{2cc3}', 0x2cc2), ('\u{2cc5}', 0x2cc4), - ('\u{2cc7}', 0x2cc6), ('\u{2cc9}', 0x2cc8), ('\u{2ccb}', 0x2cca), ('\u{2ccd}', 0x2ccc), - ('\u{2ccf}', 0x2cce), ('\u{2cd1}', 0x2cd0), ('\u{2cd3}', 0x2cd2), ('\u{2cd5}', 0x2cd4), - ('\u{2cd7}', 0x2cd6), ('\u{2cd9}', 0x2cd8), ('\u{2cdb}', 0x2cda), ('\u{2cdd}', 0x2cdc), - ('\u{2cdf}', 0x2cde), ('\u{2ce1}', 0x2ce0), ('\u{2ce3}', 0x2ce2), ('\u{2cec}', 0x2ceb), - ('\u{2cee}', 0x2ced), ('\u{2cf3}', 0x2cf2), ('\u{2d00}', 0x10a0), ('\u{2d01}', 0x10a1), - ('\u{2d02}', 0x10a2), ('\u{2d03}', 0x10a3), ('\u{2d04}', 0x10a4), ('\u{2d05}', 0x10a5), - ('\u{2d06}', 0x10a6), ('\u{2d07}', 0x10a7), ('\u{2d08}', 0x10a8), ('\u{2d09}', 0x10a9), - ('\u{2d0a}', 0x10aa), ('\u{2d0b}', 0x10ab), ('\u{2d0c}', 0x10ac), ('\u{2d0d}', 0x10ad), - ('\u{2d0e}', 0x10ae), ('\u{2d0f}', 0x10af), ('\u{2d10}', 0x10b0), ('\u{2d11}', 0x10b1), - ('\u{2d12}', 0x10b2), ('\u{2d13}', 0x10b3), ('\u{2d14}', 0x10b4), ('\u{2d15}', 0x10b5), - ('\u{2d16}', 0x10b6), ('\u{2d17}', 0x10b7), ('\u{2d18}', 0x10b8), ('\u{2d19}', 0x10b9), - ('\u{2d1a}', 0x10ba), ('\u{2d1b}', 0x10bb), ('\u{2d1c}', 0x10bc), ('\u{2d1d}', 0x10bd), - ('\u{2d1e}', 0x10be), ('\u{2d1f}', 0x10bf), ('\u{2d20}', 0x10c0), ('\u{2d21}', 0x10c1), - ('\u{2d22}', 0x10c2), ('\u{2d23}', 0x10c3), ('\u{2d24}', 0x10c4), ('\u{2d25}', 0x10c5), - ('\u{2d27}', 0x10c7), ('\u{2d2d}', 0x10cd), ('\u{a641}', 0xa640), ('\u{a643}', 0xa642), - ('\u{a645}', 0xa644), ('\u{a647}', 0xa646), ('\u{a649}', 0xa648), ('\u{a64b}', 0xa64a), - ('\u{a64d}', 0xa64c), ('\u{a64f}', 0xa64e), ('\u{a651}', 0xa650), ('\u{a653}', 0xa652), - ('\u{a655}', 0xa654), ('\u{a657}', 0xa656), ('\u{a659}', 0xa658), ('\u{a65b}', 0xa65a), - ('\u{a65d}', 0xa65c), ('\u{a65f}', 0xa65e), ('\u{a661}', 0xa660), ('\u{a663}', 0xa662), - ('\u{a665}', 0xa664), ('\u{a667}', 0xa666), ('\u{a669}', 0xa668), ('\u{a66b}', 0xa66a), - ('\u{a66d}', 0xa66c), ('\u{a681}', 0xa680), ('\u{a683}', 0xa682), ('\u{a685}', 0xa684), - ('\u{a687}', 0xa686), ('\u{a689}', 0xa688), ('\u{a68b}', 0xa68a), ('\u{a68d}', 0xa68c), - ('\u{a68f}', 0xa68e), ('\u{a691}', 0xa690), ('\u{a693}', 0xa692), ('\u{a695}', 0xa694), - ('\u{a697}', 0xa696), ('\u{a699}', 0xa698), ('\u{a69b}', 0xa69a), ('\u{a723}', 0xa722), - ('\u{a725}', 0xa724), ('\u{a727}', 0xa726), ('\u{a729}', 0xa728), ('\u{a72b}', 0xa72a), - ('\u{a72d}', 0xa72c), ('\u{a72f}', 0xa72e), ('\u{a733}', 0xa732), ('\u{a735}', 0xa734), - ('\u{a737}', 0xa736), ('\u{a739}', 0xa738), ('\u{a73b}', 0xa73a), ('\u{a73d}', 0xa73c), - ('\u{a73f}', 0xa73e), ('\u{a741}', 0xa740), ('\u{a743}', 0xa742), ('\u{a745}', 0xa744), - ('\u{a747}', 0xa746), ('\u{a749}', 0xa748), ('\u{a74b}', 0xa74a), ('\u{a74d}', 0xa74c), - ('\u{a74f}', 0xa74e), ('\u{a751}', 0xa750), ('\u{a753}', 0xa752), ('\u{a755}', 0xa754), - ('\u{a757}', 0xa756), ('\u{a759}', 0xa758), ('\u{a75b}', 0xa75a), ('\u{a75d}', 0xa75c), - ('\u{a75f}', 0xa75e), ('\u{a761}', 0xa760), ('\u{a763}', 0xa762), ('\u{a765}', 0xa764), - ('\u{a767}', 0xa766), ('\u{a769}', 0xa768), ('\u{a76b}', 0xa76a), ('\u{a76d}', 0xa76c), - ('\u{a76f}', 0xa76e), ('\u{a77a}', 0xa779), ('\u{a77c}', 0xa77b), ('\u{a77f}', 0xa77e), - ('\u{a781}', 0xa780), ('\u{a783}', 0xa782), ('\u{a785}', 0xa784), ('\u{a787}', 0xa786), - ('\u{a78c}', 0xa78b), ('\u{a791}', 0xa790), ('\u{a793}', 0xa792), ('\u{a794}', 0xa7c4), - ('\u{a797}', 0xa796), ('\u{a799}', 0xa798), ('\u{a79b}', 0xa79a), ('\u{a79d}', 0xa79c), - ('\u{a79f}', 0xa79e), ('\u{a7a1}', 0xa7a0), ('\u{a7a3}', 0xa7a2), ('\u{a7a5}', 0xa7a4), - ('\u{a7a7}', 0xa7a6), ('\u{a7a9}', 0xa7a8), ('\u{a7b5}', 0xa7b4), ('\u{a7b7}', 0xa7b6), - ('\u{a7b9}', 0xa7b8), ('\u{a7bb}', 0xa7ba), ('\u{a7bd}', 0xa7bc), ('\u{a7bf}', 0xa7be), - ('\u{a7c1}', 0xa7c0), ('\u{a7c3}', 0xa7c2), ('\u{a7c8}', 0xa7c7), ('\u{a7ca}', 0xa7c9), - ('\u{a7cd}', 0xa7cc), ('\u{a7cf}', 0xa7ce), ('\u{a7d1}', 0xa7d0), ('\u{a7d3}', 0xa7d2), - ('\u{a7d5}', 0xa7d4), ('\u{a7d7}', 0xa7d6), ('\u{a7d9}', 0xa7d8), ('\u{a7db}', 0xa7da), - ('\u{a7f6}', 0xa7f5), ('\u{ab53}', 0xa7b3), ('\u{ab70}', 0x13a0), ('\u{ab71}', 0x13a1), - ('\u{ab72}', 0x13a2), ('\u{ab73}', 0x13a3), ('\u{ab74}', 0x13a4), ('\u{ab75}', 0x13a5), - ('\u{ab76}', 0x13a6), ('\u{ab77}', 0x13a7), ('\u{ab78}', 0x13a8), ('\u{ab79}', 0x13a9), - ('\u{ab7a}', 0x13aa), ('\u{ab7b}', 0x13ab), ('\u{ab7c}', 0x13ac), ('\u{ab7d}', 0x13ad), - ('\u{ab7e}', 0x13ae), ('\u{ab7f}', 0x13af), ('\u{ab80}', 0x13b0), ('\u{ab81}', 0x13b1), - ('\u{ab82}', 0x13b2), ('\u{ab83}', 0x13b3), ('\u{ab84}', 0x13b4), ('\u{ab85}', 0x13b5), - ('\u{ab86}', 0x13b6), ('\u{ab87}', 0x13b7), ('\u{ab88}', 0x13b8), ('\u{ab89}', 0x13b9), - ('\u{ab8a}', 0x13ba), ('\u{ab8b}', 0x13bb), ('\u{ab8c}', 0x13bc), ('\u{ab8d}', 0x13bd), - ('\u{ab8e}', 0x13be), ('\u{ab8f}', 0x13bf), ('\u{ab90}', 0x13c0), ('\u{ab91}', 0x13c1), - ('\u{ab92}', 0x13c2), ('\u{ab93}', 0x13c3), ('\u{ab94}', 0x13c4), ('\u{ab95}', 0x13c5), - ('\u{ab96}', 0x13c6), ('\u{ab97}', 0x13c7), ('\u{ab98}', 0x13c8), ('\u{ab99}', 0x13c9), - ('\u{ab9a}', 0x13ca), ('\u{ab9b}', 0x13cb), ('\u{ab9c}', 0x13cc), ('\u{ab9d}', 0x13cd), - ('\u{ab9e}', 0x13ce), ('\u{ab9f}', 0x13cf), ('\u{aba0}', 0x13d0), ('\u{aba1}', 0x13d1), - ('\u{aba2}', 0x13d2), ('\u{aba3}', 0x13d3), ('\u{aba4}', 0x13d4), ('\u{aba5}', 0x13d5), - ('\u{aba6}', 0x13d6), ('\u{aba7}', 0x13d7), ('\u{aba8}', 0x13d8), ('\u{aba9}', 0x13d9), - ('\u{abaa}', 0x13da), ('\u{abab}', 0x13db), ('\u{abac}', 0x13dc), ('\u{abad}', 0x13dd), - ('\u{abae}', 0x13de), ('\u{abaf}', 0x13df), ('\u{abb0}', 0x13e0), ('\u{abb1}', 0x13e1), - ('\u{abb2}', 0x13e2), ('\u{abb3}', 0x13e3), ('\u{abb4}', 0x13e4), ('\u{abb5}', 0x13e5), - ('\u{abb6}', 0x13e6), ('\u{abb7}', 0x13e7), ('\u{abb8}', 0x13e8), ('\u{abb9}', 0x13e9), - ('\u{abba}', 0x13ea), ('\u{abbb}', 0x13eb), ('\u{abbc}', 0x13ec), ('\u{abbd}', 0x13ed), - ('\u{abbe}', 0x13ee), ('\u{abbf}', 0x13ef), ('\u{fb00}', 0x40005a), ('\u{fb01}', 0x40005b), - ('\u{fb02}', 0x40005c), ('\u{fb03}', 0x40005d), ('\u{fb04}', 0x40005e), - ('\u{fb05}', 0x40005f), ('\u{fb06}', 0x400060), ('\u{fb13}', 0x400061), - ('\u{fb14}', 0x400062), ('\u{fb15}', 0x400063), ('\u{fb16}', 0x400064), - ('\u{fb17}', 0x400065), ('\u{ff41}', 0xff21), ('\u{ff42}', 0xff22), ('\u{ff43}', 0xff23), - ('\u{ff44}', 0xff24), ('\u{ff45}', 0xff25), ('\u{ff46}', 0xff26), ('\u{ff47}', 0xff27), - ('\u{ff48}', 0xff28), ('\u{ff49}', 0xff29), ('\u{ff4a}', 0xff2a), ('\u{ff4b}', 0xff2b), - ('\u{ff4c}', 0xff2c), ('\u{ff4d}', 0xff2d), ('\u{ff4e}', 0xff2e), ('\u{ff4f}', 0xff2f), - ('\u{ff50}', 0xff30), ('\u{ff51}', 0xff31), ('\u{ff52}', 0xff32), ('\u{ff53}', 0xff33), - ('\u{ff54}', 0xff34), ('\u{ff55}', 0xff35), ('\u{ff56}', 0xff36), ('\u{ff57}', 0xff37), - ('\u{ff58}', 0xff38), ('\u{ff59}', 0xff39), ('\u{ff5a}', 0xff3a), ('\u{10428}', 0x10400), - ('\u{10429}', 0x10401), ('\u{1042a}', 0x10402), ('\u{1042b}', 0x10403), - ('\u{1042c}', 0x10404), ('\u{1042d}', 0x10405), ('\u{1042e}', 0x10406), - ('\u{1042f}', 0x10407), ('\u{10430}', 0x10408), ('\u{10431}', 0x10409), - ('\u{10432}', 0x1040a), ('\u{10433}', 0x1040b), ('\u{10434}', 0x1040c), - ('\u{10435}', 0x1040d), ('\u{10436}', 0x1040e), ('\u{10437}', 0x1040f), - ('\u{10438}', 0x10410), ('\u{10439}', 0x10411), ('\u{1043a}', 0x10412), - ('\u{1043b}', 0x10413), ('\u{1043c}', 0x10414), ('\u{1043d}', 0x10415), - ('\u{1043e}', 0x10416), ('\u{1043f}', 0x10417), ('\u{10440}', 0x10418), - ('\u{10441}', 0x10419), ('\u{10442}', 0x1041a), ('\u{10443}', 0x1041b), - ('\u{10444}', 0x1041c), ('\u{10445}', 0x1041d), ('\u{10446}', 0x1041e), - ('\u{10447}', 0x1041f), ('\u{10448}', 0x10420), ('\u{10449}', 0x10421), - ('\u{1044a}', 0x10422), ('\u{1044b}', 0x10423), ('\u{1044c}', 0x10424), - ('\u{1044d}', 0x10425), ('\u{1044e}', 0x10426), ('\u{1044f}', 0x10427), - ('\u{104d8}', 0x104b0), ('\u{104d9}', 0x104b1), ('\u{104da}', 0x104b2), - ('\u{104db}', 0x104b3), ('\u{104dc}', 0x104b4), ('\u{104dd}', 0x104b5), - ('\u{104de}', 0x104b6), ('\u{104df}', 0x104b7), ('\u{104e0}', 0x104b8), - ('\u{104e1}', 0x104b9), ('\u{104e2}', 0x104ba), ('\u{104e3}', 0x104bb), - ('\u{104e4}', 0x104bc), ('\u{104e5}', 0x104bd), ('\u{104e6}', 0x104be), - ('\u{104e7}', 0x104bf), ('\u{104e8}', 0x104c0), ('\u{104e9}', 0x104c1), - ('\u{104ea}', 0x104c2), ('\u{104eb}', 0x104c3), ('\u{104ec}', 0x104c4), - ('\u{104ed}', 0x104c5), ('\u{104ee}', 0x104c6), ('\u{104ef}', 0x104c7), - ('\u{104f0}', 0x104c8), ('\u{104f1}', 0x104c9), ('\u{104f2}', 0x104ca), - ('\u{104f3}', 0x104cb), ('\u{104f4}', 0x104cc), ('\u{104f5}', 0x104cd), - ('\u{104f6}', 0x104ce), ('\u{104f7}', 0x104cf), ('\u{104f8}', 0x104d0), - ('\u{104f9}', 0x104d1), ('\u{104fa}', 0x104d2), ('\u{104fb}', 0x104d3), - ('\u{10597}', 0x10570), ('\u{10598}', 0x10571), ('\u{10599}', 0x10572), - ('\u{1059a}', 0x10573), ('\u{1059b}', 0x10574), ('\u{1059c}', 0x10575), - ('\u{1059d}', 0x10576), ('\u{1059e}', 0x10577), ('\u{1059f}', 0x10578), - ('\u{105a0}', 0x10579), ('\u{105a1}', 0x1057a), ('\u{105a3}', 0x1057c), - ('\u{105a4}', 0x1057d), ('\u{105a5}', 0x1057e), ('\u{105a6}', 0x1057f), - ('\u{105a7}', 0x10580), ('\u{105a8}', 0x10581), ('\u{105a9}', 0x10582), - ('\u{105aa}', 0x10583), ('\u{105ab}', 0x10584), ('\u{105ac}', 0x10585), - ('\u{105ad}', 0x10586), ('\u{105ae}', 0x10587), ('\u{105af}', 0x10588), - ('\u{105b0}', 0x10589), ('\u{105b1}', 0x1058a), ('\u{105b3}', 0x1058c), - ('\u{105b4}', 0x1058d), ('\u{105b5}', 0x1058e), ('\u{105b6}', 0x1058f), - ('\u{105b7}', 0x10590), ('\u{105b8}', 0x10591), ('\u{105b9}', 0x10592), - ('\u{105bb}', 0x10594), ('\u{105bc}', 0x10595), ('\u{10cc0}', 0x10c80), - ('\u{10cc1}', 0x10c81), ('\u{10cc2}', 0x10c82), ('\u{10cc3}', 0x10c83), - ('\u{10cc4}', 0x10c84), ('\u{10cc5}', 0x10c85), ('\u{10cc6}', 0x10c86), - ('\u{10cc7}', 0x10c87), ('\u{10cc8}', 0x10c88), ('\u{10cc9}', 0x10c89), - ('\u{10cca}', 0x10c8a), ('\u{10ccb}', 0x10c8b), ('\u{10ccc}', 0x10c8c), - ('\u{10ccd}', 0x10c8d), ('\u{10cce}', 0x10c8e), ('\u{10ccf}', 0x10c8f), - ('\u{10cd0}', 0x10c90), ('\u{10cd1}', 0x10c91), ('\u{10cd2}', 0x10c92), - ('\u{10cd3}', 0x10c93), ('\u{10cd4}', 0x10c94), ('\u{10cd5}', 0x10c95), - ('\u{10cd6}', 0x10c96), ('\u{10cd7}', 0x10c97), ('\u{10cd8}', 0x10c98), - ('\u{10cd9}', 0x10c99), ('\u{10cda}', 0x10c9a), ('\u{10cdb}', 0x10c9b), - ('\u{10cdc}', 0x10c9c), ('\u{10cdd}', 0x10c9d), ('\u{10cde}', 0x10c9e), - ('\u{10cdf}', 0x10c9f), ('\u{10ce0}', 0x10ca0), ('\u{10ce1}', 0x10ca1), - ('\u{10ce2}', 0x10ca2), ('\u{10ce3}', 0x10ca3), ('\u{10ce4}', 0x10ca4), - ('\u{10ce5}', 0x10ca5), ('\u{10ce6}', 0x10ca6), ('\u{10ce7}', 0x10ca7), - ('\u{10ce8}', 0x10ca8), ('\u{10ce9}', 0x10ca9), ('\u{10cea}', 0x10caa), - ('\u{10ceb}', 0x10cab), ('\u{10cec}', 0x10cac), ('\u{10ced}', 0x10cad), - ('\u{10cee}', 0x10cae), ('\u{10cef}', 0x10caf), ('\u{10cf0}', 0x10cb0), - ('\u{10cf1}', 0x10cb1), ('\u{10cf2}', 0x10cb2), ('\u{10d70}', 0x10d50), - ('\u{10d71}', 0x10d51), ('\u{10d72}', 0x10d52), ('\u{10d73}', 0x10d53), - ('\u{10d74}', 0x10d54), ('\u{10d75}', 0x10d55), ('\u{10d76}', 0x10d56), - ('\u{10d77}', 0x10d57), ('\u{10d78}', 0x10d58), ('\u{10d79}', 0x10d59), - ('\u{10d7a}', 0x10d5a), ('\u{10d7b}', 0x10d5b), ('\u{10d7c}', 0x10d5c), - ('\u{10d7d}', 0x10d5d), ('\u{10d7e}', 0x10d5e), ('\u{10d7f}', 0x10d5f), - ('\u{10d80}', 0x10d60), ('\u{10d81}', 0x10d61), ('\u{10d82}', 0x10d62), - ('\u{10d83}', 0x10d63), ('\u{10d84}', 0x10d64), ('\u{10d85}', 0x10d65), - ('\u{118c0}', 0x118a0), ('\u{118c1}', 0x118a1), ('\u{118c2}', 0x118a2), - ('\u{118c3}', 0x118a3), ('\u{118c4}', 0x118a4), ('\u{118c5}', 0x118a5), - ('\u{118c6}', 0x118a6), ('\u{118c7}', 0x118a7), ('\u{118c8}', 0x118a8), - ('\u{118c9}', 0x118a9), ('\u{118ca}', 0x118aa), ('\u{118cb}', 0x118ab), - ('\u{118cc}', 0x118ac), ('\u{118cd}', 0x118ad), ('\u{118ce}', 0x118ae), - ('\u{118cf}', 0x118af), ('\u{118d0}', 0x118b0), ('\u{118d1}', 0x118b1), - ('\u{118d2}', 0x118b2), ('\u{118d3}', 0x118b3), ('\u{118d4}', 0x118b4), - ('\u{118d5}', 0x118b5), ('\u{118d6}', 0x118b6), ('\u{118d7}', 0x118b7), - ('\u{118d8}', 0x118b8), ('\u{118d9}', 0x118b9), ('\u{118da}', 0x118ba), - ('\u{118db}', 0x118bb), ('\u{118dc}', 0x118bc), ('\u{118dd}', 0x118bd), - ('\u{118de}', 0x118be), ('\u{118df}', 0x118bf), ('\u{16e60}', 0x16e40), - ('\u{16e61}', 0x16e41), ('\u{16e62}', 0x16e42), ('\u{16e63}', 0x16e43), - ('\u{16e64}', 0x16e44), ('\u{16e65}', 0x16e45), ('\u{16e66}', 0x16e46), - ('\u{16e67}', 0x16e47), ('\u{16e68}', 0x16e48), ('\u{16e69}', 0x16e49), - ('\u{16e6a}', 0x16e4a), ('\u{16e6b}', 0x16e4b), ('\u{16e6c}', 0x16e4c), - ('\u{16e6d}', 0x16e4d), ('\u{16e6e}', 0x16e4e), ('\u{16e6f}', 0x16e4f), - ('\u{16e70}', 0x16e50), ('\u{16e71}', 0x16e51), ('\u{16e72}', 0x16e52), - ('\u{16e73}', 0x16e53), ('\u{16e74}', 0x16e54), ('\u{16e75}', 0x16e55), - ('\u{16e76}', 0x16e56), ('\u{16e77}', 0x16e57), ('\u{16e78}', 0x16e58), - ('\u{16e79}', 0x16e59), ('\u{16e7a}', 0x16e5a), ('\u{16e7b}', 0x16e5b), - ('\u{16e7c}', 0x16e5c), ('\u{16e7d}', 0x16e5d), ('\u{16e7e}', 0x16e5e), - ('\u{16e7f}', 0x16e5f), ('\u{16ebb}', 0x16ea0), ('\u{16ebc}', 0x16ea1), - ('\u{16ebd}', 0x16ea2), ('\u{16ebe}', 0x16ea3), ('\u{16ebf}', 0x16ea4), - ('\u{16ec0}', 0x16ea5), ('\u{16ec1}', 0x16ea6), ('\u{16ec2}', 0x16ea7), - ('\u{16ec3}', 0x16ea8), ('\u{16ec4}', 0x16ea9), ('\u{16ec5}', 0x16eaa), - ('\u{16ec6}', 0x16eab), ('\u{16ec7}', 0x16eac), ('\u{16ec8}', 0x16ead), - ('\u{16ec9}', 0x16eae), ('\u{16eca}', 0x16eaf), ('\u{16ecb}', 0x16eb0), - ('\u{16ecc}', 0x16eb1), ('\u{16ecd}', 0x16eb2), ('\u{16ece}', 0x16eb3), - ('\u{16ecf}', 0x16eb4), ('\u{16ed0}', 0x16eb5), ('\u{16ed1}', 0x16eb6), - ('\u{16ed2}', 0x16eb7), ('\u{16ed3}', 0x16eb8), ('\u{1e922}', 0x1e900), - ('\u{1e923}', 0x1e901), ('\u{1e924}', 0x1e902), ('\u{1e925}', 0x1e903), - ('\u{1e926}', 0x1e904), ('\u{1e927}', 0x1e905), ('\u{1e928}', 0x1e906), - ('\u{1e929}', 0x1e907), ('\u{1e92a}', 0x1e908), ('\u{1e92b}', 0x1e909), - ('\u{1e92c}', 0x1e90a), ('\u{1e92d}', 0x1e90b), ('\u{1e92e}', 0x1e90c), - ('\u{1e92f}', 0x1e90d), ('\u{1e930}', 0x1e90e), ('\u{1e931}', 0x1e90f), - ('\u{1e932}', 0x1e910), ('\u{1e933}', 0x1e911), ('\u{1e934}', 0x1e912), - ('\u{1e935}', 0x1e913), ('\u{1e936}', 0x1e914), ('\u{1e937}', 0x1e915), - ('\u{1e938}', 0x1e916), ('\u{1e939}', 0x1e917), ('\u{1e93a}', 0x1e918), - ('\u{1e93b}', 0x1e919), ('\u{1e93c}', 0x1e91a), ('\u{1e93d}', 0x1e91b), - ('\u{1e93e}', 0x1e91c), ('\u{1e93f}', 0x1e91d), ('\u{1e940}', 0x1e91e), - ('\u{1e941}', 0x1e91f), ('\u{1e942}', 0x1e920), ('\u{1e943}', 0x1e921), + ('\u{b5}', 924), ('\u{df}', 4194304), ('\u{e0}', 192), ('\u{e1}', 193), ('\u{e2}', 194), + ('\u{e3}', 195), ('\u{e4}', 196), ('\u{e5}', 197), ('\u{e6}', 198), ('\u{e7}', 199), + ('\u{e8}', 200), ('\u{e9}', 201), ('\u{ea}', 202), ('\u{eb}', 203), ('\u{ec}', 204), + ('\u{ed}', 205), ('\u{ee}', 206), ('\u{ef}', 207), ('\u{f0}', 208), ('\u{f1}', 209), + ('\u{f2}', 210), ('\u{f3}', 211), ('\u{f4}', 212), ('\u{f5}', 213), ('\u{f6}', 214), + ('\u{f8}', 216), ('\u{f9}', 217), ('\u{fa}', 218), ('\u{fb}', 219), ('\u{fc}', 220), + ('\u{fd}', 221), ('\u{fe}', 222), ('\u{ff}', 376), ('\u{101}', 256), ('\u{103}', 258), + ('\u{105}', 260), ('\u{107}', 262), ('\u{109}', 264), ('\u{10b}', 266), ('\u{10d}', 268), + ('\u{10f}', 270), ('\u{111}', 272), ('\u{113}', 274), ('\u{115}', 276), ('\u{117}', 278), + ('\u{119}', 280), ('\u{11b}', 282), ('\u{11d}', 284), ('\u{11f}', 286), ('\u{121}', 288), + ('\u{123}', 290), ('\u{125}', 292), ('\u{127}', 294), ('\u{129}', 296), ('\u{12b}', 298), + ('\u{12d}', 300), ('\u{12f}', 302), ('\u{131}', 73), ('\u{133}', 306), ('\u{135}', 308), + ('\u{137}', 310), ('\u{13a}', 313), ('\u{13c}', 315), ('\u{13e}', 317), ('\u{140}', 319), + ('\u{142}', 321), ('\u{144}', 323), ('\u{146}', 325), ('\u{148}', 327), + ('\u{149}', 4194305), ('\u{14b}', 330), ('\u{14d}', 332), ('\u{14f}', 334), + ('\u{151}', 336), ('\u{153}', 338), ('\u{155}', 340), ('\u{157}', 342), ('\u{159}', 344), + ('\u{15b}', 346), ('\u{15d}', 348), ('\u{15f}', 350), ('\u{161}', 352), ('\u{163}', 354), + ('\u{165}', 356), ('\u{167}', 358), ('\u{169}', 360), ('\u{16b}', 362), ('\u{16d}', 364), + ('\u{16f}', 366), ('\u{171}', 368), ('\u{173}', 370), ('\u{175}', 372), ('\u{177}', 374), + ('\u{17a}', 377), ('\u{17c}', 379), ('\u{17e}', 381), ('\u{17f}', 83), ('\u{180}', 579), + ('\u{183}', 386), ('\u{185}', 388), ('\u{188}', 391), ('\u{18c}', 395), ('\u{192}', 401), + ('\u{195}', 502), ('\u{199}', 408), ('\u{19a}', 573), ('\u{19b}', 42972), ('\u{19e}', 544), + ('\u{1a1}', 416), ('\u{1a3}', 418), ('\u{1a5}', 420), ('\u{1a8}', 423), ('\u{1ad}', 428), + ('\u{1b0}', 431), ('\u{1b4}', 435), ('\u{1b6}', 437), ('\u{1b9}', 440), ('\u{1bd}', 444), + ('\u{1bf}', 503), ('\u{1c5}', 452), ('\u{1c6}', 452), ('\u{1c8}', 455), ('\u{1c9}', 455), + ('\u{1cb}', 458), ('\u{1cc}', 458), ('\u{1ce}', 461), ('\u{1d0}', 463), ('\u{1d2}', 465), + ('\u{1d4}', 467), ('\u{1d6}', 469), ('\u{1d8}', 471), ('\u{1da}', 473), ('\u{1dc}', 475), + ('\u{1dd}', 398), ('\u{1df}', 478), ('\u{1e1}', 480), ('\u{1e3}', 482), ('\u{1e5}', 484), + ('\u{1e7}', 486), ('\u{1e9}', 488), ('\u{1eb}', 490), ('\u{1ed}', 492), ('\u{1ef}', 494), + ('\u{1f0}', 4194306), ('\u{1f2}', 497), ('\u{1f3}', 497), ('\u{1f5}', 500), + ('\u{1f9}', 504), ('\u{1fb}', 506), ('\u{1fd}', 508), ('\u{1ff}', 510), ('\u{201}', 512), + ('\u{203}', 514), ('\u{205}', 516), ('\u{207}', 518), ('\u{209}', 520), ('\u{20b}', 522), + ('\u{20d}', 524), ('\u{20f}', 526), ('\u{211}', 528), ('\u{213}', 530), ('\u{215}', 532), + ('\u{217}', 534), ('\u{219}', 536), ('\u{21b}', 538), ('\u{21d}', 540), ('\u{21f}', 542), + ('\u{223}', 546), ('\u{225}', 548), ('\u{227}', 550), ('\u{229}', 552), ('\u{22b}', 554), + ('\u{22d}', 556), ('\u{22f}', 558), ('\u{231}', 560), ('\u{233}', 562), ('\u{23c}', 571), + ('\u{23f}', 11390), ('\u{240}', 11391), ('\u{242}', 577), ('\u{247}', 582), + ('\u{249}', 584), ('\u{24b}', 586), ('\u{24d}', 588), ('\u{24f}', 590), ('\u{250}', 11375), + ('\u{251}', 11373), ('\u{252}', 11376), ('\u{253}', 385), ('\u{254}', 390), + ('\u{256}', 393), ('\u{257}', 394), ('\u{259}', 399), ('\u{25b}', 400), ('\u{25c}', 42923), + ('\u{260}', 403), ('\u{261}', 42924), ('\u{263}', 404), ('\u{264}', 42955), + ('\u{265}', 42893), ('\u{266}', 42922), ('\u{268}', 407), ('\u{269}', 406), + ('\u{26a}', 42926), ('\u{26b}', 11362), ('\u{26c}', 42925), ('\u{26f}', 412), + ('\u{271}', 11374), ('\u{272}', 413), ('\u{275}', 415), ('\u{27d}', 11364), + ('\u{280}', 422), ('\u{282}', 42949), ('\u{283}', 425), ('\u{287}', 42929), + ('\u{288}', 430), ('\u{289}', 580), ('\u{28a}', 433), ('\u{28b}', 434), ('\u{28c}', 581), + ('\u{292}', 439), ('\u{29d}', 42930), ('\u{29e}', 42928), ('\u{345}', 921), + ('\u{371}', 880), ('\u{373}', 882), ('\u{377}', 886), ('\u{37b}', 1021), ('\u{37c}', 1022), + ('\u{37d}', 1023), ('\u{390}', 4194307), ('\u{3ac}', 902), ('\u{3ad}', 904), + ('\u{3ae}', 905), ('\u{3af}', 906), ('\u{3b0}', 4194308), ('\u{3b1}', 913), + ('\u{3b2}', 914), ('\u{3b3}', 915), ('\u{3b4}', 916), ('\u{3b5}', 917), ('\u{3b6}', 918), + ('\u{3b7}', 919), ('\u{3b8}', 920), ('\u{3b9}', 921), ('\u{3ba}', 922), ('\u{3bb}', 923), + ('\u{3bc}', 924), ('\u{3bd}', 925), ('\u{3be}', 926), ('\u{3bf}', 927), ('\u{3c0}', 928), + ('\u{3c1}', 929), ('\u{3c2}', 931), ('\u{3c3}', 931), ('\u{3c4}', 932), ('\u{3c5}', 933), + ('\u{3c6}', 934), ('\u{3c7}', 935), ('\u{3c8}', 936), ('\u{3c9}', 937), ('\u{3ca}', 938), + ('\u{3cb}', 939), ('\u{3cc}', 908), ('\u{3cd}', 910), ('\u{3ce}', 911), ('\u{3d0}', 914), + ('\u{3d1}', 920), ('\u{3d5}', 934), ('\u{3d6}', 928), ('\u{3d7}', 975), ('\u{3d9}', 984), + ('\u{3db}', 986), ('\u{3dd}', 988), ('\u{3df}', 990), ('\u{3e1}', 992), ('\u{3e3}', 994), + ('\u{3e5}', 996), ('\u{3e7}', 998), ('\u{3e9}', 1000), ('\u{3eb}', 1002), ('\u{3ed}', 1004), + ('\u{3ef}', 1006), ('\u{3f0}', 922), ('\u{3f1}', 929), ('\u{3f2}', 1017), ('\u{3f3}', 895), + ('\u{3f5}', 917), ('\u{3f8}', 1015), ('\u{3fb}', 1018), ('\u{430}', 1040), + ('\u{431}', 1041), ('\u{432}', 1042), ('\u{433}', 1043), ('\u{434}', 1044), + ('\u{435}', 1045), ('\u{436}', 1046), ('\u{437}', 1047), ('\u{438}', 1048), + ('\u{439}', 1049), ('\u{43a}', 1050), ('\u{43b}', 1051), ('\u{43c}', 1052), + ('\u{43d}', 1053), ('\u{43e}', 1054), ('\u{43f}', 1055), ('\u{440}', 1056), + ('\u{441}', 1057), ('\u{442}', 1058), ('\u{443}', 1059), ('\u{444}', 1060), + ('\u{445}', 1061), ('\u{446}', 1062), ('\u{447}', 1063), ('\u{448}', 1064), + ('\u{449}', 1065), ('\u{44a}', 1066), ('\u{44b}', 1067), ('\u{44c}', 1068), + ('\u{44d}', 1069), ('\u{44e}', 1070), ('\u{44f}', 1071), ('\u{450}', 1024), + ('\u{451}', 1025), ('\u{452}', 1026), ('\u{453}', 1027), ('\u{454}', 1028), + ('\u{455}', 1029), ('\u{456}', 1030), ('\u{457}', 1031), ('\u{458}', 1032), + ('\u{459}', 1033), ('\u{45a}', 1034), ('\u{45b}', 1035), ('\u{45c}', 1036), + ('\u{45d}', 1037), ('\u{45e}', 1038), ('\u{45f}', 1039), ('\u{461}', 1120), + ('\u{463}', 1122), ('\u{465}', 1124), ('\u{467}', 1126), ('\u{469}', 1128), + ('\u{46b}', 1130), ('\u{46d}', 1132), ('\u{46f}', 1134), ('\u{471}', 1136), + ('\u{473}', 1138), ('\u{475}', 1140), ('\u{477}', 1142), ('\u{479}', 1144), + ('\u{47b}', 1146), ('\u{47d}', 1148), ('\u{47f}', 1150), ('\u{481}', 1152), + ('\u{48b}', 1162), ('\u{48d}', 1164), ('\u{48f}', 1166), ('\u{491}', 1168), + ('\u{493}', 1170), ('\u{495}', 1172), ('\u{497}', 1174), ('\u{499}', 1176), + ('\u{49b}', 1178), ('\u{49d}', 1180), ('\u{49f}', 1182), ('\u{4a1}', 1184), + ('\u{4a3}', 1186), ('\u{4a5}', 1188), ('\u{4a7}', 1190), ('\u{4a9}', 1192), + ('\u{4ab}', 1194), ('\u{4ad}', 1196), ('\u{4af}', 1198), ('\u{4b1}', 1200), + ('\u{4b3}', 1202), ('\u{4b5}', 1204), ('\u{4b7}', 1206), ('\u{4b9}', 1208), + ('\u{4bb}', 1210), ('\u{4bd}', 1212), ('\u{4bf}', 1214), ('\u{4c2}', 1217), + ('\u{4c4}', 1219), ('\u{4c6}', 1221), ('\u{4c8}', 1223), ('\u{4ca}', 1225), + ('\u{4cc}', 1227), ('\u{4ce}', 1229), ('\u{4cf}', 1216), ('\u{4d1}', 1232), + ('\u{4d3}', 1234), ('\u{4d5}', 1236), ('\u{4d7}', 1238), ('\u{4d9}', 1240), + ('\u{4db}', 1242), ('\u{4dd}', 1244), ('\u{4df}', 1246), ('\u{4e1}', 1248), + ('\u{4e3}', 1250), ('\u{4e5}', 1252), ('\u{4e7}', 1254), ('\u{4e9}', 1256), + ('\u{4eb}', 1258), ('\u{4ed}', 1260), ('\u{4ef}', 1262), ('\u{4f1}', 1264), + ('\u{4f3}', 1266), ('\u{4f5}', 1268), ('\u{4f7}', 1270), ('\u{4f9}', 1272), + ('\u{4fb}', 1274), ('\u{4fd}', 1276), ('\u{4ff}', 1278), ('\u{501}', 1280), + ('\u{503}', 1282), ('\u{505}', 1284), ('\u{507}', 1286), ('\u{509}', 1288), + ('\u{50b}', 1290), ('\u{50d}', 1292), ('\u{50f}', 1294), ('\u{511}', 1296), + ('\u{513}', 1298), ('\u{515}', 1300), ('\u{517}', 1302), ('\u{519}', 1304), + ('\u{51b}', 1306), ('\u{51d}', 1308), ('\u{51f}', 1310), ('\u{521}', 1312), + ('\u{523}', 1314), ('\u{525}', 1316), ('\u{527}', 1318), ('\u{529}', 1320), + ('\u{52b}', 1322), ('\u{52d}', 1324), ('\u{52f}', 1326), ('\u{561}', 1329), + ('\u{562}', 1330), ('\u{563}', 1331), ('\u{564}', 1332), ('\u{565}', 1333), + ('\u{566}', 1334), ('\u{567}', 1335), ('\u{568}', 1336), ('\u{569}', 1337), + ('\u{56a}', 1338), ('\u{56b}', 1339), ('\u{56c}', 1340), ('\u{56d}', 1341), + ('\u{56e}', 1342), ('\u{56f}', 1343), ('\u{570}', 1344), ('\u{571}', 1345), + ('\u{572}', 1346), ('\u{573}', 1347), ('\u{574}', 1348), ('\u{575}', 1349), + ('\u{576}', 1350), ('\u{577}', 1351), ('\u{578}', 1352), ('\u{579}', 1353), + ('\u{57a}', 1354), ('\u{57b}', 1355), ('\u{57c}', 1356), ('\u{57d}', 1357), + ('\u{57e}', 1358), ('\u{57f}', 1359), ('\u{580}', 1360), ('\u{581}', 1361), + ('\u{582}', 1362), ('\u{583}', 1363), ('\u{584}', 1364), ('\u{585}', 1365), + ('\u{586}', 1366), ('\u{587}', 4194309), ('\u{10d0}', 7312), ('\u{10d1}', 7313), + ('\u{10d2}', 7314), ('\u{10d3}', 7315), ('\u{10d4}', 7316), ('\u{10d5}', 7317), + ('\u{10d6}', 7318), ('\u{10d7}', 7319), ('\u{10d8}', 7320), ('\u{10d9}', 7321), + ('\u{10da}', 7322), ('\u{10db}', 7323), ('\u{10dc}', 7324), ('\u{10dd}', 7325), + ('\u{10de}', 7326), ('\u{10df}', 7327), ('\u{10e0}', 7328), ('\u{10e1}', 7329), + ('\u{10e2}', 7330), ('\u{10e3}', 7331), ('\u{10e4}', 7332), ('\u{10e5}', 7333), + ('\u{10e6}', 7334), ('\u{10e7}', 7335), ('\u{10e8}', 7336), ('\u{10e9}', 7337), + ('\u{10ea}', 7338), ('\u{10eb}', 7339), ('\u{10ec}', 7340), ('\u{10ed}', 7341), + ('\u{10ee}', 7342), ('\u{10ef}', 7343), ('\u{10f0}', 7344), ('\u{10f1}', 7345), + ('\u{10f2}', 7346), ('\u{10f3}', 7347), ('\u{10f4}', 7348), ('\u{10f5}', 7349), + ('\u{10f6}', 7350), ('\u{10f7}', 7351), ('\u{10f8}', 7352), ('\u{10f9}', 7353), + ('\u{10fa}', 7354), ('\u{10fd}', 7357), ('\u{10fe}', 7358), ('\u{10ff}', 7359), + ('\u{13f8}', 5104), ('\u{13f9}', 5105), ('\u{13fa}', 5106), ('\u{13fb}', 5107), + ('\u{13fc}', 5108), ('\u{13fd}', 5109), ('\u{1c80}', 1042), ('\u{1c81}', 1044), + ('\u{1c82}', 1054), ('\u{1c83}', 1057), ('\u{1c84}', 1058), ('\u{1c85}', 1058), + ('\u{1c86}', 1066), ('\u{1c87}', 1122), ('\u{1c88}', 42570), ('\u{1c8a}', 7305), + ('\u{1d79}', 42877), ('\u{1d7d}', 11363), ('\u{1d8e}', 42950), ('\u{1e01}', 7680), + ('\u{1e03}', 7682), ('\u{1e05}', 7684), ('\u{1e07}', 7686), ('\u{1e09}', 7688), + ('\u{1e0b}', 7690), ('\u{1e0d}', 7692), ('\u{1e0f}', 7694), ('\u{1e11}', 7696), + ('\u{1e13}', 7698), ('\u{1e15}', 7700), ('\u{1e17}', 7702), ('\u{1e19}', 7704), + ('\u{1e1b}', 7706), ('\u{1e1d}', 7708), ('\u{1e1f}', 7710), ('\u{1e21}', 7712), + ('\u{1e23}', 7714), ('\u{1e25}', 7716), ('\u{1e27}', 7718), ('\u{1e29}', 7720), + ('\u{1e2b}', 7722), ('\u{1e2d}', 7724), ('\u{1e2f}', 7726), ('\u{1e31}', 7728), + ('\u{1e33}', 7730), ('\u{1e35}', 7732), ('\u{1e37}', 7734), ('\u{1e39}', 7736), + ('\u{1e3b}', 7738), ('\u{1e3d}', 7740), ('\u{1e3f}', 7742), ('\u{1e41}', 7744), + ('\u{1e43}', 7746), ('\u{1e45}', 7748), ('\u{1e47}', 7750), ('\u{1e49}', 7752), + ('\u{1e4b}', 7754), ('\u{1e4d}', 7756), ('\u{1e4f}', 7758), ('\u{1e51}', 7760), + ('\u{1e53}', 7762), ('\u{1e55}', 7764), ('\u{1e57}', 7766), ('\u{1e59}', 7768), + ('\u{1e5b}', 7770), ('\u{1e5d}', 7772), ('\u{1e5f}', 7774), ('\u{1e61}', 7776), + ('\u{1e63}', 7778), ('\u{1e65}', 7780), ('\u{1e67}', 7782), ('\u{1e69}', 7784), + ('\u{1e6b}', 7786), ('\u{1e6d}', 7788), ('\u{1e6f}', 7790), ('\u{1e71}', 7792), + ('\u{1e73}', 7794), ('\u{1e75}', 7796), ('\u{1e77}', 7798), ('\u{1e79}', 7800), + ('\u{1e7b}', 7802), ('\u{1e7d}', 7804), ('\u{1e7f}', 7806), ('\u{1e81}', 7808), + ('\u{1e83}', 7810), ('\u{1e85}', 7812), ('\u{1e87}', 7814), ('\u{1e89}', 7816), + ('\u{1e8b}', 7818), ('\u{1e8d}', 7820), ('\u{1e8f}', 7822), ('\u{1e91}', 7824), + ('\u{1e93}', 7826), ('\u{1e95}', 7828), ('\u{1e96}', 4194310), ('\u{1e97}', 4194311), + ('\u{1e98}', 4194312), ('\u{1e99}', 4194313), ('\u{1e9a}', 4194314), ('\u{1e9b}', 7776), + ('\u{1ea1}', 7840), ('\u{1ea3}', 7842), ('\u{1ea5}', 7844), ('\u{1ea7}', 7846), + ('\u{1ea9}', 7848), ('\u{1eab}', 7850), ('\u{1ead}', 7852), ('\u{1eaf}', 7854), + ('\u{1eb1}', 7856), ('\u{1eb3}', 7858), ('\u{1eb5}', 7860), ('\u{1eb7}', 7862), + ('\u{1eb9}', 7864), ('\u{1ebb}', 7866), ('\u{1ebd}', 7868), ('\u{1ebf}', 7870), + ('\u{1ec1}', 7872), ('\u{1ec3}', 7874), ('\u{1ec5}', 7876), ('\u{1ec7}', 7878), + ('\u{1ec9}', 7880), ('\u{1ecb}', 7882), ('\u{1ecd}', 7884), ('\u{1ecf}', 7886), + ('\u{1ed1}', 7888), ('\u{1ed3}', 7890), ('\u{1ed5}', 7892), ('\u{1ed7}', 7894), + ('\u{1ed9}', 7896), ('\u{1edb}', 7898), ('\u{1edd}', 7900), ('\u{1edf}', 7902), + ('\u{1ee1}', 7904), ('\u{1ee3}', 7906), ('\u{1ee5}', 7908), ('\u{1ee7}', 7910), + ('\u{1ee9}', 7912), ('\u{1eeb}', 7914), ('\u{1eed}', 7916), ('\u{1eef}', 7918), + ('\u{1ef1}', 7920), ('\u{1ef3}', 7922), ('\u{1ef5}', 7924), ('\u{1ef7}', 7926), + ('\u{1ef9}', 7928), ('\u{1efb}', 7930), ('\u{1efd}', 7932), ('\u{1eff}', 7934), + ('\u{1f00}', 7944), ('\u{1f01}', 7945), ('\u{1f02}', 7946), ('\u{1f03}', 7947), + ('\u{1f04}', 7948), ('\u{1f05}', 7949), ('\u{1f06}', 7950), ('\u{1f07}', 7951), + ('\u{1f10}', 7960), ('\u{1f11}', 7961), ('\u{1f12}', 7962), ('\u{1f13}', 7963), + ('\u{1f14}', 7964), ('\u{1f15}', 7965), ('\u{1f20}', 7976), ('\u{1f21}', 7977), + ('\u{1f22}', 7978), ('\u{1f23}', 7979), ('\u{1f24}', 7980), ('\u{1f25}', 7981), + ('\u{1f26}', 7982), ('\u{1f27}', 7983), ('\u{1f30}', 7992), ('\u{1f31}', 7993), + ('\u{1f32}', 7994), ('\u{1f33}', 7995), ('\u{1f34}', 7996), ('\u{1f35}', 7997), + ('\u{1f36}', 7998), ('\u{1f37}', 7999), ('\u{1f40}', 8008), ('\u{1f41}', 8009), + ('\u{1f42}', 8010), ('\u{1f43}', 8011), ('\u{1f44}', 8012), ('\u{1f45}', 8013), + ('\u{1f50}', 4194315), ('\u{1f51}', 8025), ('\u{1f52}', 4194316), ('\u{1f53}', 8027), + ('\u{1f54}', 4194317), ('\u{1f55}', 8029), ('\u{1f56}', 4194318), ('\u{1f57}', 8031), + ('\u{1f60}', 8040), ('\u{1f61}', 8041), ('\u{1f62}', 8042), ('\u{1f63}', 8043), + ('\u{1f64}', 8044), ('\u{1f65}', 8045), ('\u{1f66}', 8046), ('\u{1f67}', 8047), + ('\u{1f70}', 8122), ('\u{1f71}', 8123), ('\u{1f72}', 8136), ('\u{1f73}', 8137), + ('\u{1f74}', 8138), ('\u{1f75}', 8139), ('\u{1f76}', 8154), ('\u{1f77}', 8155), + ('\u{1f78}', 8184), ('\u{1f79}', 8185), ('\u{1f7a}', 8170), ('\u{1f7b}', 8171), + ('\u{1f7c}', 8186), ('\u{1f7d}', 8187), ('\u{1f80}', 4194319), ('\u{1f81}', 4194320), + ('\u{1f82}', 4194321), ('\u{1f83}', 4194322), ('\u{1f84}', 4194323), ('\u{1f85}', 4194324), + ('\u{1f86}', 4194325), ('\u{1f87}', 4194326), ('\u{1f88}', 4194327), ('\u{1f89}', 4194328), + ('\u{1f8a}', 4194329), ('\u{1f8b}', 4194330), ('\u{1f8c}', 4194331), ('\u{1f8d}', 4194332), + ('\u{1f8e}', 4194333), ('\u{1f8f}', 4194334), ('\u{1f90}', 4194335), ('\u{1f91}', 4194336), + ('\u{1f92}', 4194337), ('\u{1f93}', 4194338), ('\u{1f94}', 4194339), ('\u{1f95}', 4194340), + ('\u{1f96}', 4194341), ('\u{1f97}', 4194342), ('\u{1f98}', 4194343), ('\u{1f99}', 4194344), + ('\u{1f9a}', 4194345), ('\u{1f9b}', 4194346), ('\u{1f9c}', 4194347), ('\u{1f9d}', 4194348), + ('\u{1f9e}', 4194349), ('\u{1f9f}', 4194350), ('\u{1fa0}', 4194351), ('\u{1fa1}', 4194352), + ('\u{1fa2}', 4194353), ('\u{1fa3}', 4194354), ('\u{1fa4}', 4194355), ('\u{1fa5}', 4194356), + ('\u{1fa6}', 4194357), ('\u{1fa7}', 4194358), ('\u{1fa8}', 4194359), ('\u{1fa9}', 4194360), + ('\u{1faa}', 4194361), ('\u{1fab}', 4194362), ('\u{1fac}', 4194363), ('\u{1fad}', 4194364), + ('\u{1fae}', 4194365), ('\u{1faf}', 4194366), ('\u{1fb0}', 8120), ('\u{1fb1}', 8121), + ('\u{1fb2}', 4194367), ('\u{1fb3}', 4194368), ('\u{1fb4}', 4194369), ('\u{1fb6}', 4194370), + ('\u{1fb7}', 4194371), ('\u{1fbc}', 4194372), ('\u{1fbe}', 921), ('\u{1fc2}', 4194373), + ('\u{1fc3}', 4194374), ('\u{1fc4}', 4194375), ('\u{1fc6}', 4194376), ('\u{1fc7}', 4194377), + ('\u{1fcc}', 4194378), ('\u{1fd0}', 8152), ('\u{1fd1}', 8153), ('\u{1fd2}', 4194379), + ('\u{1fd3}', 4194380), ('\u{1fd6}', 4194381), ('\u{1fd7}', 4194382), ('\u{1fe0}', 8168), + ('\u{1fe1}', 8169), ('\u{1fe2}', 4194383), ('\u{1fe3}', 4194384), ('\u{1fe4}', 4194385), + ('\u{1fe5}', 8172), ('\u{1fe6}', 4194386), ('\u{1fe7}', 4194387), ('\u{1ff2}', 4194388), + ('\u{1ff3}', 4194389), ('\u{1ff4}', 4194390), ('\u{1ff6}', 4194391), ('\u{1ff7}', 4194392), + ('\u{1ffc}', 4194393), ('\u{214e}', 8498), ('\u{2170}', 8544), ('\u{2171}', 8545), + ('\u{2172}', 8546), ('\u{2173}', 8547), ('\u{2174}', 8548), ('\u{2175}', 8549), + ('\u{2176}', 8550), ('\u{2177}', 8551), ('\u{2178}', 8552), ('\u{2179}', 8553), + ('\u{217a}', 8554), ('\u{217b}', 8555), ('\u{217c}', 8556), ('\u{217d}', 8557), + ('\u{217e}', 8558), ('\u{217f}', 8559), ('\u{2184}', 8579), ('\u{24d0}', 9398), + ('\u{24d1}', 9399), ('\u{24d2}', 9400), ('\u{24d3}', 9401), ('\u{24d4}', 9402), + ('\u{24d5}', 9403), ('\u{24d6}', 9404), ('\u{24d7}', 9405), ('\u{24d8}', 9406), + ('\u{24d9}', 9407), ('\u{24da}', 9408), ('\u{24db}', 9409), ('\u{24dc}', 9410), + ('\u{24dd}', 9411), ('\u{24de}', 9412), ('\u{24df}', 9413), ('\u{24e0}', 9414), + ('\u{24e1}', 9415), ('\u{24e2}', 9416), ('\u{24e3}', 9417), ('\u{24e4}', 9418), + ('\u{24e5}', 9419), ('\u{24e6}', 9420), ('\u{24e7}', 9421), ('\u{24e8}', 9422), + ('\u{24e9}', 9423), ('\u{2c30}', 11264), ('\u{2c31}', 11265), ('\u{2c32}', 11266), + ('\u{2c33}', 11267), ('\u{2c34}', 11268), ('\u{2c35}', 11269), ('\u{2c36}', 11270), + ('\u{2c37}', 11271), ('\u{2c38}', 11272), ('\u{2c39}', 11273), ('\u{2c3a}', 11274), + ('\u{2c3b}', 11275), ('\u{2c3c}', 11276), ('\u{2c3d}', 11277), ('\u{2c3e}', 11278), + ('\u{2c3f}', 11279), ('\u{2c40}', 11280), ('\u{2c41}', 11281), ('\u{2c42}', 11282), + ('\u{2c43}', 11283), ('\u{2c44}', 11284), ('\u{2c45}', 11285), ('\u{2c46}', 11286), + ('\u{2c47}', 11287), ('\u{2c48}', 11288), ('\u{2c49}', 11289), ('\u{2c4a}', 11290), + ('\u{2c4b}', 11291), ('\u{2c4c}', 11292), ('\u{2c4d}', 11293), ('\u{2c4e}', 11294), + ('\u{2c4f}', 11295), ('\u{2c50}', 11296), ('\u{2c51}', 11297), ('\u{2c52}', 11298), + ('\u{2c53}', 11299), ('\u{2c54}', 11300), ('\u{2c55}', 11301), ('\u{2c56}', 11302), + ('\u{2c57}', 11303), ('\u{2c58}', 11304), ('\u{2c59}', 11305), ('\u{2c5a}', 11306), + ('\u{2c5b}', 11307), ('\u{2c5c}', 11308), ('\u{2c5d}', 11309), ('\u{2c5e}', 11310), + ('\u{2c5f}', 11311), ('\u{2c61}', 11360), ('\u{2c65}', 570), ('\u{2c66}', 574), + ('\u{2c68}', 11367), ('\u{2c6a}', 11369), ('\u{2c6c}', 11371), ('\u{2c73}', 11378), + ('\u{2c76}', 11381), ('\u{2c81}', 11392), ('\u{2c83}', 11394), ('\u{2c85}', 11396), + ('\u{2c87}', 11398), ('\u{2c89}', 11400), ('\u{2c8b}', 11402), ('\u{2c8d}', 11404), + ('\u{2c8f}', 11406), ('\u{2c91}', 11408), ('\u{2c93}', 11410), ('\u{2c95}', 11412), + ('\u{2c97}', 11414), ('\u{2c99}', 11416), ('\u{2c9b}', 11418), ('\u{2c9d}', 11420), + ('\u{2c9f}', 11422), ('\u{2ca1}', 11424), ('\u{2ca3}', 11426), ('\u{2ca5}', 11428), + ('\u{2ca7}', 11430), ('\u{2ca9}', 11432), ('\u{2cab}', 11434), ('\u{2cad}', 11436), + ('\u{2caf}', 11438), ('\u{2cb1}', 11440), ('\u{2cb3}', 11442), ('\u{2cb5}', 11444), + ('\u{2cb7}', 11446), ('\u{2cb9}', 11448), ('\u{2cbb}', 11450), ('\u{2cbd}', 11452), + ('\u{2cbf}', 11454), ('\u{2cc1}', 11456), ('\u{2cc3}', 11458), ('\u{2cc5}', 11460), + ('\u{2cc7}', 11462), ('\u{2cc9}', 11464), ('\u{2ccb}', 11466), ('\u{2ccd}', 11468), + ('\u{2ccf}', 11470), ('\u{2cd1}', 11472), ('\u{2cd3}', 11474), ('\u{2cd5}', 11476), + ('\u{2cd7}', 11478), ('\u{2cd9}', 11480), ('\u{2cdb}', 11482), ('\u{2cdd}', 11484), + ('\u{2cdf}', 11486), ('\u{2ce1}', 11488), ('\u{2ce3}', 11490), ('\u{2cec}', 11499), + ('\u{2cee}', 11501), ('\u{2cf3}', 11506), ('\u{2d00}', 4256), ('\u{2d01}', 4257), + ('\u{2d02}', 4258), ('\u{2d03}', 4259), ('\u{2d04}', 4260), ('\u{2d05}', 4261), + ('\u{2d06}', 4262), ('\u{2d07}', 4263), ('\u{2d08}', 4264), ('\u{2d09}', 4265), + ('\u{2d0a}', 4266), ('\u{2d0b}', 4267), ('\u{2d0c}', 4268), ('\u{2d0d}', 4269), + ('\u{2d0e}', 4270), ('\u{2d0f}', 4271), ('\u{2d10}', 4272), ('\u{2d11}', 4273), + ('\u{2d12}', 4274), ('\u{2d13}', 4275), ('\u{2d14}', 4276), ('\u{2d15}', 4277), + ('\u{2d16}', 4278), ('\u{2d17}', 4279), ('\u{2d18}', 4280), ('\u{2d19}', 4281), + ('\u{2d1a}', 4282), ('\u{2d1b}', 4283), ('\u{2d1c}', 4284), ('\u{2d1d}', 4285), + ('\u{2d1e}', 4286), ('\u{2d1f}', 4287), ('\u{2d20}', 4288), ('\u{2d21}', 4289), + ('\u{2d22}', 4290), ('\u{2d23}', 4291), ('\u{2d24}', 4292), ('\u{2d25}', 4293), + ('\u{2d27}', 4295), ('\u{2d2d}', 4301), ('\u{a641}', 42560), ('\u{a643}', 42562), + ('\u{a645}', 42564), ('\u{a647}', 42566), ('\u{a649}', 42568), ('\u{a64b}', 42570), + ('\u{a64d}', 42572), ('\u{a64f}', 42574), ('\u{a651}', 42576), ('\u{a653}', 42578), + ('\u{a655}', 42580), ('\u{a657}', 42582), ('\u{a659}', 42584), ('\u{a65b}', 42586), + ('\u{a65d}', 42588), ('\u{a65f}', 42590), ('\u{a661}', 42592), ('\u{a663}', 42594), + ('\u{a665}', 42596), ('\u{a667}', 42598), ('\u{a669}', 42600), ('\u{a66b}', 42602), + ('\u{a66d}', 42604), ('\u{a681}', 42624), ('\u{a683}', 42626), ('\u{a685}', 42628), + ('\u{a687}', 42630), ('\u{a689}', 42632), ('\u{a68b}', 42634), ('\u{a68d}', 42636), + ('\u{a68f}', 42638), ('\u{a691}', 42640), ('\u{a693}', 42642), ('\u{a695}', 42644), + ('\u{a697}', 42646), ('\u{a699}', 42648), ('\u{a69b}', 42650), ('\u{a723}', 42786), + ('\u{a725}', 42788), ('\u{a727}', 42790), ('\u{a729}', 42792), ('\u{a72b}', 42794), + ('\u{a72d}', 42796), ('\u{a72f}', 42798), ('\u{a733}', 42802), ('\u{a735}', 42804), + ('\u{a737}', 42806), ('\u{a739}', 42808), ('\u{a73b}', 42810), ('\u{a73d}', 42812), + ('\u{a73f}', 42814), ('\u{a741}', 42816), ('\u{a743}', 42818), ('\u{a745}', 42820), + ('\u{a747}', 42822), ('\u{a749}', 42824), ('\u{a74b}', 42826), ('\u{a74d}', 42828), + ('\u{a74f}', 42830), ('\u{a751}', 42832), ('\u{a753}', 42834), ('\u{a755}', 42836), + ('\u{a757}', 42838), ('\u{a759}', 42840), ('\u{a75b}', 42842), ('\u{a75d}', 42844), + ('\u{a75f}', 42846), ('\u{a761}', 42848), ('\u{a763}', 42850), ('\u{a765}', 42852), + ('\u{a767}', 42854), ('\u{a769}', 42856), ('\u{a76b}', 42858), ('\u{a76d}', 42860), + ('\u{a76f}', 42862), ('\u{a77a}', 42873), ('\u{a77c}', 42875), ('\u{a77f}', 42878), + ('\u{a781}', 42880), ('\u{a783}', 42882), ('\u{a785}', 42884), ('\u{a787}', 42886), + ('\u{a78c}', 42891), ('\u{a791}', 42896), ('\u{a793}', 42898), ('\u{a794}', 42948), + ('\u{a797}', 42902), ('\u{a799}', 42904), ('\u{a79b}', 42906), ('\u{a79d}', 42908), + ('\u{a79f}', 42910), ('\u{a7a1}', 42912), ('\u{a7a3}', 42914), ('\u{a7a5}', 42916), + ('\u{a7a7}', 42918), ('\u{a7a9}', 42920), ('\u{a7b5}', 42932), ('\u{a7b7}', 42934), + ('\u{a7b9}', 42936), ('\u{a7bb}', 42938), ('\u{a7bd}', 42940), ('\u{a7bf}', 42942), + ('\u{a7c1}', 42944), ('\u{a7c3}', 42946), ('\u{a7c8}', 42951), ('\u{a7ca}', 42953), + ('\u{a7cd}', 42956), ('\u{a7cf}', 42958), ('\u{a7d1}', 42960), ('\u{a7d3}', 42962), + ('\u{a7d5}', 42964), ('\u{a7d7}', 42966), ('\u{a7d9}', 42968), ('\u{a7db}', 42970), + ('\u{a7f6}', 42997), ('\u{ab53}', 42931), ('\u{ab70}', 5024), ('\u{ab71}', 5025), + ('\u{ab72}', 5026), ('\u{ab73}', 5027), ('\u{ab74}', 5028), ('\u{ab75}', 5029), + ('\u{ab76}', 5030), ('\u{ab77}', 5031), ('\u{ab78}', 5032), ('\u{ab79}', 5033), + ('\u{ab7a}', 5034), ('\u{ab7b}', 5035), ('\u{ab7c}', 5036), ('\u{ab7d}', 5037), + ('\u{ab7e}', 5038), ('\u{ab7f}', 5039), ('\u{ab80}', 5040), ('\u{ab81}', 5041), + ('\u{ab82}', 5042), ('\u{ab83}', 5043), ('\u{ab84}', 5044), ('\u{ab85}', 5045), + ('\u{ab86}', 5046), ('\u{ab87}', 5047), ('\u{ab88}', 5048), ('\u{ab89}', 5049), + ('\u{ab8a}', 5050), ('\u{ab8b}', 5051), ('\u{ab8c}', 5052), ('\u{ab8d}', 5053), + ('\u{ab8e}', 5054), ('\u{ab8f}', 5055), ('\u{ab90}', 5056), ('\u{ab91}', 5057), + ('\u{ab92}', 5058), ('\u{ab93}', 5059), ('\u{ab94}', 5060), ('\u{ab95}', 5061), + ('\u{ab96}', 5062), ('\u{ab97}', 5063), ('\u{ab98}', 5064), ('\u{ab99}', 5065), + ('\u{ab9a}', 5066), ('\u{ab9b}', 5067), ('\u{ab9c}', 5068), ('\u{ab9d}', 5069), + ('\u{ab9e}', 5070), ('\u{ab9f}', 5071), ('\u{aba0}', 5072), ('\u{aba1}', 5073), + ('\u{aba2}', 5074), ('\u{aba3}', 5075), ('\u{aba4}', 5076), ('\u{aba5}', 5077), + ('\u{aba6}', 5078), ('\u{aba7}', 5079), ('\u{aba8}', 5080), ('\u{aba9}', 5081), + ('\u{abaa}', 5082), ('\u{abab}', 5083), ('\u{abac}', 5084), ('\u{abad}', 5085), + ('\u{abae}', 5086), ('\u{abaf}', 5087), ('\u{abb0}', 5088), ('\u{abb1}', 5089), + ('\u{abb2}', 5090), ('\u{abb3}', 5091), ('\u{abb4}', 5092), ('\u{abb5}', 5093), + ('\u{abb6}', 5094), ('\u{abb7}', 5095), ('\u{abb8}', 5096), ('\u{abb9}', 5097), + ('\u{abba}', 5098), ('\u{abbb}', 5099), ('\u{abbc}', 5100), ('\u{abbd}', 5101), + ('\u{abbe}', 5102), ('\u{abbf}', 5103), ('\u{fb00}', 4194394), ('\u{fb01}', 4194395), + ('\u{fb02}', 4194396), ('\u{fb03}', 4194397), ('\u{fb04}', 4194398), ('\u{fb05}', 4194399), + ('\u{fb06}', 4194400), ('\u{fb13}', 4194401), ('\u{fb14}', 4194402), ('\u{fb15}', 4194403), + ('\u{fb16}', 4194404), ('\u{fb17}', 4194405), ('\u{ff41}', 65313), ('\u{ff42}', 65314), + ('\u{ff43}', 65315), ('\u{ff44}', 65316), ('\u{ff45}', 65317), ('\u{ff46}', 65318), + ('\u{ff47}', 65319), ('\u{ff48}', 65320), ('\u{ff49}', 65321), ('\u{ff4a}', 65322), + ('\u{ff4b}', 65323), ('\u{ff4c}', 65324), ('\u{ff4d}', 65325), ('\u{ff4e}', 65326), + ('\u{ff4f}', 65327), ('\u{ff50}', 65328), ('\u{ff51}', 65329), ('\u{ff52}', 65330), + ('\u{ff53}', 65331), ('\u{ff54}', 65332), ('\u{ff55}', 65333), ('\u{ff56}', 65334), + ('\u{ff57}', 65335), ('\u{ff58}', 65336), ('\u{ff59}', 65337), ('\u{ff5a}', 65338), + ('\u{10428}', 66560), ('\u{10429}', 66561), ('\u{1042a}', 66562), ('\u{1042b}', 66563), + ('\u{1042c}', 66564), ('\u{1042d}', 66565), ('\u{1042e}', 66566), ('\u{1042f}', 66567), + ('\u{10430}', 66568), ('\u{10431}', 66569), ('\u{10432}', 66570), ('\u{10433}', 66571), + ('\u{10434}', 66572), ('\u{10435}', 66573), ('\u{10436}', 66574), ('\u{10437}', 66575), + ('\u{10438}', 66576), ('\u{10439}', 66577), ('\u{1043a}', 66578), ('\u{1043b}', 66579), + ('\u{1043c}', 66580), ('\u{1043d}', 66581), ('\u{1043e}', 66582), ('\u{1043f}', 66583), + ('\u{10440}', 66584), ('\u{10441}', 66585), ('\u{10442}', 66586), ('\u{10443}', 66587), + ('\u{10444}', 66588), ('\u{10445}', 66589), ('\u{10446}', 66590), ('\u{10447}', 66591), + ('\u{10448}', 66592), ('\u{10449}', 66593), ('\u{1044a}', 66594), ('\u{1044b}', 66595), + ('\u{1044c}', 66596), ('\u{1044d}', 66597), ('\u{1044e}', 66598), ('\u{1044f}', 66599), + ('\u{104d8}', 66736), ('\u{104d9}', 66737), ('\u{104da}', 66738), ('\u{104db}', 66739), + ('\u{104dc}', 66740), ('\u{104dd}', 66741), ('\u{104de}', 66742), ('\u{104df}', 66743), + ('\u{104e0}', 66744), ('\u{104e1}', 66745), ('\u{104e2}', 66746), ('\u{104e3}', 66747), + ('\u{104e4}', 66748), ('\u{104e5}', 66749), ('\u{104e6}', 66750), ('\u{104e7}', 66751), + ('\u{104e8}', 66752), ('\u{104e9}', 66753), ('\u{104ea}', 66754), ('\u{104eb}', 66755), + ('\u{104ec}', 66756), ('\u{104ed}', 66757), ('\u{104ee}', 66758), ('\u{104ef}', 66759), + ('\u{104f0}', 66760), ('\u{104f1}', 66761), ('\u{104f2}', 66762), ('\u{104f3}', 66763), + ('\u{104f4}', 66764), ('\u{104f5}', 66765), ('\u{104f6}', 66766), ('\u{104f7}', 66767), + ('\u{104f8}', 66768), ('\u{104f9}', 66769), ('\u{104fa}', 66770), ('\u{104fb}', 66771), + ('\u{10597}', 66928), ('\u{10598}', 66929), ('\u{10599}', 66930), ('\u{1059a}', 66931), + ('\u{1059b}', 66932), ('\u{1059c}', 66933), ('\u{1059d}', 66934), ('\u{1059e}', 66935), + ('\u{1059f}', 66936), ('\u{105a0}', 66937), ('\u{105a1}', 66938), ('\u{105a3}', 66940), + ('\u{105a4}', 66941), ('\u{105a5}', 66942), ('\u{105a6}', 66943), ('\u{105a7}', 66944), + ('\u{105a8}', 66945), ('\u{105a9}', 66946), ('\u{105aa}', 66947), ('\u{105ab}', 66948), + ('\u{105ac}', 66949), ('\u{105ad}', 66950), ('\u{105ae}', 66951), ('\u{105af}', 66952), + ('\u{105b0}', 66953), ('\u{105b1}', 66954), ('\u{105b3}', 66956), ('\u{105b4}', 66957), + ('\u{105b5}', 66958), ('\u{105b6}', 66959), ('\u{105b7}', 66960), ('\u{105b8}', 66961), + ('\u{105b9}', 66962), ('\u{105bb}', 66964), ('\u{105bc}', 66965), ('\u{10cc0}', 68736), + ('\u{10cc1}', 68737), ('\u{10cc2}', 68738), ('\u{10cc3}', 68739), ('\u{10cc4}', 68740), + ('\u{10cc5}', 68741), ('\u{10cc6}', 68742), ('\u{10cc7}', 68743), ('\u{10cc8}', 68744), + ('\u{10cc9}', 68745), ('\u{10cca}', 68746), ('\u{10ccb}', 68747), ('\u{10ccc}', 68748), + ('\u{10ccd}', 68749), ('\u{10cce}', 68750), ('\u{10ccf}', 68751), ('\u{10cd0}', 68752), + ('\u{10cd1}', 68753), ('\u{10cd2}', 68754), ('\u{10cd3}', 68755), ('\u{10cd4}', 68756), + ('\u{10cd5}', 68757), ('\u{10cd6}', 68758), ('\u{10cd7}', 68759), ('\u{10cd8}', 68760), + ('\u{10cd9}', 68761), ('\u{10cda}', 68762), ('\u{10cdb}', 68763), ('\u{10cdc}', 68764), + ('\u{10cdd}', 68765), ('\u{10cde}', 68766), ('\u{10cdf}', 68767), ('\u{10ce0}', 68768), + ('\u{10ce1}', 68769), ('\u{10ce2}', 68770), ('\u{10ce3}', 68771), ('\u{10ce4}', 68772), + ('\u{10ce5}', 68773), ('\u{10ce6}', 68774), ('\u{10ce7}', 68775), ('\u{10ce8}', 68776), + ('\u{10ce9}', 68777), ('\u{10cea}', 68778), ('\u{10ceb}', 68779), ('\u{10cec}', 68780), + ('\u{10ced}', 68781), ('\u{10cee}', 68782), ('\u{10cef}', 68783), ('\u{10cf0}', 68784), + ('\u{10cf1}', 68785), ('\u{10cf2}', 68786), ('\u{10d70}', 68944), ('\u{10d71}', 68945), + ('\u{10d72}', 68946), ('\u{10d73}', 68947), ('\u{10d74}', 68948), ('\u{10d75}', 68949), + ('\u{10d76}', 68950), ('\u{10d77}', 68951), ('\u{10d78}', 68952), ('\u{10d79}', 68953), + ('\u{10d7a}', 68954), ('\u{10d7b}', 68955), ('\u{10d7c}', 68956), ('\u{10d7d}', 68957), + ('\u{10d7e}', 68958), ('\u{10d7f}', 68959), ('\u{10d80}', 68960), ('\u{10d81}', 68961), + ('\u{10d82}', 68962), ('\u{10d83}', 68963), ('\u{10d84}', 68964), ('\u{10d85}', 68965), + ('\u{118c0}', 71840), ('\u{118c1}', 71841), ('\u{118c2}', 71842), ('\u{118c3}', 71843), + ('\u{118c4}', 71844), ('\u{118c5}', 71845), ('\u{118c6}', 71846), ('\u{118c7}', 71847), + ('\u{118c8}', 71848), ('\u{118c9}', 71849), ('\u{118ca}', 71850), ('\u{118cb}', 71851), + ('\u{118cc}', 71852), ('\u{118cd}', 71853), ('\u{118ce}', 71854), ('\u{118cf}', 71855), + ('\u{118d0}', 71856), ('\u{118d1}', 71857), ('\u{118d2}', 71858), ('\u{118d3}', 71859), + ('\u{118d4}', 71860), ('\u{118d5}', 71861), ('\u{118d6}', 71862), ('\u{118d7}', 71863), + ('\u{118d8}', 71864), ('\u{118d9}', 71865), ('\u{118da}', 71866), ('\u{118db}', 71867), + ('\u{118dc}', 71868), ('\u{118dd}', 71869), ('\u{118de}', 71870), ('\u{118df}', 71871), + ('\u{16e60}', 93760), ('\u{16e61}', 93761), ('\u{16e62}', 93762), ('\u{16e63}', 93763), + ('\u{16e64}', 93764), ('\u{16e65}', 93765), ('\u{16e66}', 93766), ('\u{16e67}', 93767), + ('\u{16e68}', 93768), ('\u{16e69}', 93769), ('\u{16e6a}', 93770), ('\u{16e6b}', 93771), + ('\u{16e6c}', 93772), ('\u{16e6d}', 93773), ('\u{16e6e}', 93774), ('\u{16e6f}', 93775), + ('\u{16e70}', 93776), ('\u{16e71}', 93777), ('\u{16e72}', 93778), ('\u{16e73}', 93779), + ('\u{16e74}', 93780), ('\u{16e75}', 93781), ('\u{16e76}', 93782), ('\u{16e77}', 93783), + ('\u{16e78}', 93784), ('\u{16e79}', 93785), ('\u{16e7a}', 93786), ('\u{16e7b}', 93787), + ('\u{16e7c}', 93788), ('\u{16e7d}', 93789), ('\u{16e7e}', 93790), ('\u{16e7f}', 93791), + ('\u{16ebb}', 93856), ('\u{16ebc}', 93857), ('\u{16ebd}', 93858), ('\u{16ebe}', 93859), + ('\u{16ebf}', 93860), ('\u{16ec0}', 93861), ('\u{16ec1}', 93862), ('\u{16ec2}', 93863), + ('\u{16ec3}', 93864), ('\u{16ec4}', 93865), ('\u{16ec5}', 93866), ('\u{16ec6}', 93867), + ('\u{16ec7}', 93868), ('\u{16ec8}', 93869), ('\u{16ec9}', 93870), ('\u{16eca}', 93871), + ('\u{16ecb}', 93872), ('\u{16ecc}', 93873), ('\u{16ecd}', 93874), ('\u{16ece}', 93875), + ('\u{16ecf}', 93876), ('\u{16ed0}', 93877), ('\u{16ed1}', 93878), ('\u{16ed2}', 93879), + ('\u{16ed3}', 93880), ('\u{1e922}', 125184), ('\u{1e923}', 125185), ('\u{1e924}', 125186), + ('\u{1e925}', 125187), ('\u{1e926}', 125188), ('\u{1e927}', 125189), ('\u{1e928}', 125190), + ('\u{1e929}', 125191), ('\u{1e92a}', 125192), ('\u{1e92b}', 125193), ('\u{1e92c}', 125194), + ('\u{1e92d}', 125195), ('\u{1e92e}', 125196), ('\u{1e92f}', 125197), ('\u{1e930}', 125198), + ('\u{1e931}', 125199), ('\u{1e932}', 125200), ('\u{1e933}', 125201), ('\u{1e934}', 125202), + ('\u{1e935}', 125203), ('\u{1e936}', 125204), ('\u{1e937}', 125205), ('\u{1e938}', 125206), + ('\u{1e939}', 125207), ('\u{1e93a}', 125208), ('\u{1e93b}', 125209), ('\u{1e93c}', 125210), + ('\u{1e93d}', 125211), ('\u{1e93e}', 125212), ('\u{1e93f}', 125213), ('\u{1e940}', 125214), + ('\u{1e941}', 125215), ('\u{1e942}', 125216), ('\u{1e943}', 125217), ]; - #[rustfmt::skip] static UPPERCASE_TABLE_MULTI: &[[char; 3]; 102] = &[ - ['\u{53}', '\u{53}', '\u{0}'], ['\u{2bc}', '\u{4e}', '\u{0}'], - ['\u{4a}', '\u{30c}', '\u{0}'], ['\u{399}', '\u{308}', '\u{301}'], - ['\u{3a5}', '\u{308}', '\u{301}'], ['\u{535}', '\u{552}', '\u{0}'], - ['\u{48}', '\u{331}', '\u{0}'], ['\u{54}', '\u{308}', '\u{0}'], - ['\u{57}', '\u{30a}', '\u{0}'], ['\u{59}', '\u{30a}', '\u{0}'], - ['\u{41}', '\u{2be}', '\u{0}'], ['\u{3a5}', '\u{313}', '\u{0}'], - ['\u{3a5}', '\u{313}', '\u{300}'], ['\u{3a5}', '\u{313}', '\u{301}'], - ['\u{3a5}', '\u{313}', '\u{342}'], ['\u{1f08}', '\u{399}', '\u{0}'], - ['\u{1f09}', '\u{399}', '\u{0}'], ['\u{1f0a}', '\u{399}', '\u{0}'], - ['\u{1f0b}', '\u{399}', '\u{0}'], ['\u{1f0c}', '\u{399}', '\u{0}'], - ['\u{1f0d}', '\u{399}', '\u{0}'], ['\u{1f0e}', '\u{399}', '\u{0}'], - ['\u{1f0f}', '\u{399}', '\u{0}'], ['\u{1f08}', '\u{399}', '\u{0}'], - ['\u{1f09}', '\u{399}', '\u{0}'], ['\u{1f0a}', '\u{399}', '\u{0}'], - ['\u{1f0b}', '\u{399}', '\u{0}'], ['\u{1f0c}', '\u{399}', '\u{0}'], - ['\u{1f0d}', '\u{399}', '\u{0}'], ['\u{1f0e}', '\u{399}', '\u{0}'], - ['\u{1f0f}', '\u{399}', '\u{0}'], ['\u{1f28}', '\u{399}', '\u{0}'], - ['\u{1f29}', '\u{399}', '\u{0}'], ['\u{1f2a}', '\u{399}', '\u{0}'], - ['\u{1f2b}', '\u{399}', '\u{0}'], ['\u{1f2c}', '\u{399}', '\u{0}'], - ['\u{1f2d}', '\u{399}', '\u{0}'], ['\u{1f2e}', '\u{399}', '\u{0}'], - ['\u{1f2f}', '\u{399}', '\u{0}'], ['\u{1f28}', '\u{399}', '\u{0}'], - ['\u{1f29}', '\u{399}', '\u{0}'], ['\u{1f2a}', '\u{399}', '\u{0}'], - ['\u{1f2b}', '\u{399}', '\u{0}'], ['\u{1f2c}', '\u{399}', '\u{0}'], - ['\u{1f2d}', '\u{399}', '\u{0}'], ['\u{1f2e}', '\u{399}', '\u{0}'], - ['\u{1f2f}', '\u{399}', '\u{0}'], ['\u{1f68}', '\u{399}', '\u{0}'], - ['\u{1f69}', '\u{399}', '\u{0}'], ['\u{1f6a}', '\u{399}', '\u{0}'], - ['\u{1f6b}', '\u{399}', '\u{0}'], ['\u{1f6c}', '\u{399}', '\u{0}'], - ['\u{1f6d}', '\u{399}', '\u{0}'], ['\u{1f6e}', '\u{399}', '\u{0}'], - ['\u{1f6f}', '\u{399}', '\u{0}'], ['\u{1f68}', '\u{399}', '\u{0}'], - ['\u{1f69}', '\u{399}', '\u{0}'], ['\u{1f6a}', '\u{399}', '\u{0}'], - ['\u{1f6b}', '\u{399}', '\u{0}'], ['\u{1f6c}', '\u{399}', '\u{0}'], - ['\u{1f6d}', '\u{399}', '\u{0}'], ['\u{1f6e}', '\u{399}', '\u{0}'], - ['\u{1f6f}', '\u{399}', '\u{0}'], ['\u{1fba}', '\u{399}', '\u{0}'], - ['\u{391}', '\u{399}', '\u{0}'], ['\u{386}', '\u{399}', '\u{0}'], - ['\u{391}', '\u{342}', '\u{0}'], ['\u{391}', '\u{342}', '\u{399}'], - ['\u{391}', '\u{399}', '\u{0}'], ['\u{1fca}', '\u{399}', '\u{0}'], - ['\u{397}', '\u{399}', '\u{0}'], ['\u{389}', '\u{399}', '\u{0}'], - ['\u{397}', '\u{342}', '\u{0}'], ['\u{397}', '\u{342}', '\u{399}'], - ['\u{397}', '\u{399}', '\u{0}'], ['\u{399}', '\u{308}', '\u{300}'], - ['\u{399}', '\u{308}', '\u{301}'], ['\u{399}', '\u{342}', '\u{0}'], - ['\u{399}', '\u{308}', '\u{342}'], ['\u{3a5}', '\u{308}', '\u{300}'], - ['\u{3a5}', '\u{308}', '\u{301}'], ['\u{3a1}', '\u{313}', '\u{0}'], - ['\u{3a5}', '\u{342}', '\u{0}'], ['\u{3a5}', '\u{308}', '\u{342}'], - ['\u{1ffa}', '\u{399}', '\u{0}'], ['\u{3a9}', '\u{399}', '\u{0}'], - ['\u{38f}', '\u{399}', '\u{0}'], ['\u{3a9}', '\u{342}', '\u{0}'], - ['\u{3a9}', '\u{342}', '\u{399}'], ['\u{3a9}', '\u{399}', '\u{0}'], - ['\u{46}', '\u{46}', '\u{0}'], ['\u{46}', '\u{49}', '\u{0}'], ['\u{46}', '\u{4c}', '\u{0}'], - ['\u{46}', '\u{46}', '\u{49}'], ['\u{46}', '\u{46}', '\u{4c}'], - ['\u{53}', '\u{54}', '\u{0}'], ['\u{53}', '\u{54}', '\u{0}'], - ['\u{544}', '\u{546}', '\u{0}'], ['\u{544}', '\u{535}', '\u{0}'], + ['S', 'S', '\u{0}'], ['\u{2bc}', 'N', '\u{0}'], ['J', '\u{30c}', '\u{0}'], + ['\u{399}', '\u{308}', '\u{301}'], ['\u{3a5}', '\u{308}', '\u{301}'], + ['\u{535}', '\u{552}', '\u{0}'], ['H', '\u{331}', '\u{0}'], ['T', '\u{308}', '\u{0}'], + ['W', '\u{30a}', '\u{0}'], ['Y', '\u{30a}', '\u{0}'], ['A', '\u{2be}', '\u{0}'], + ['\u{3a5}', '\u{313}', '\u{0}'], ['\u{3a5}', '\u{313}', '\u{300}'], + ['\u{3a5}', '\u{313}', '\u{301}'], ['\u{3a5}', '\u{313}', '\u{342}'], + ['\u{1f08}', '\u{399}', '\u{0}'], ['\u{1f09}', '\u{399}', '\u{0}'], + ['\u{1f0a}', '\u{399}', '\u{0}'], ['\u{1f0b}', '\u{399}', '\u{0}'], + ['\u{1f0c}', '\u{399}', '\u{0}'], ['\u{1f0d}', '\u{399}', '\u{0}'], + ['\u{1f0e}', '\u{399}', '\u{0}'], ['\u{1f0f}', '\u{399}', '\u{0}'], + ['\u{1f08}', '\u{399}', '\u{0}'], ['\u{1f09}', '\u{399}', '\u{0}'], + ['\u{1f0a}', '\u{399}', '\u{0}'], ['\u{1f0b}', '\u{399}', '\u{0}'], + ['\u{1f0c}', '\u{399}', '\u{0}'], ['\u{1f0d}', '\u{399}', '\u{0}'], + ['\u{1f0e}', '\u{399}', '\u{0}'], ['\u{1f0f}', '\u{399}', '\u{0}'], + ['\u{1f28}', '\u{399}', '\u{0}'], ['\u{1f29}', '\u{399}', '\u{0}'], + ['\u{1f2a}', '\u{399}', '\u{0}'], ['\u{1f2b}', '\u{399}', '\u{0}'], + ['\u{1f2c}', '\u{399}', '\u{0}'], ['\u{1f2d}', '\u{399}', '\u{0}'], + ['\u{1f2e}', '\u{399}', '\u{0}'], ['\u{1f2f}', '\u{399}', '\u{0}'], + ['\u{1f28}', '\u{399}', '\u{0}'], ['\u{1f29}', '\u{399}', '\u{0}'], + ['\u{1f2a}', '\u{399}', '\u{0}'], ['\u{1f2b}', '\u{399}', '\u{0}'], + ['\u{1f2c}', '\u{399}', '\u{0}'], ['\u{1f2d}', '\u{399}', '\u{0}'], + ['\u{1f2e}', '\u{399}', '\u{0}'], ['\u{1f2f}', '\u{399}', '\u{0}'], + ['\u{1f68}', '\u{399}', '\u{0}'], ['\u{1f69}', '\u{399}', '\u{0}'], + ['\u{1f6a}', '\u{399}', '\u{0}'], ['\u{1f6b}', '\u{399}', '\u{0}'], + ['\u{1f6c}', '\u{399}', '\u{0}'], ['\u{1f6d}', '\u{399}', '\u{0}'], + ['\u{1f6e}', '\u{399}', '\u{0}'], ['\u{1f6f}', '\u{399}', '\u{0}'], + ['\u{1f68}', '\u{399}', '\u{0}'], ['\u{1f69}', '\u{399}', '\u{0}'], + ['\u{1f6a}', '\u{399}', '\u{0}'], ['\u{1f6b}', '\u{399}', '\u{0}'], + ['\u{1f6c}', '\u{399}', '\u{0}'], ['\u{1f6d}', '\u{399}', '\u{0}'], + ['\u{1f6e}', '\u{399}', '\u{0}'], ['\u{1f6f}', '\u{399}', '\u{0}'], + ['\u{1fba}', '\u{399}', '\u{0}'], ['\u{391}', '\u{399}', '\u{0}'], + ['\u{386}', '\u{399}', '\u{0}'], ['\u{391}', '\u{342}', '\u{0}'], + ['\u{391}', '\u{342}', '\u{399}'], ['\u{391}', '\u{399}', '\u{0}'], + ['\u{1fca}', '\u{399}', '\u{0}'], ['\u{397}', '\u{399}', '\u{0}'], + ['\u{389}', '\u{399}', '\u{0}'], ['\u{397}', '\u{342}', '\u{0}'], + ['\u{397}', '\u{342}', '\u{399}'], ['\u{397}', '\u{399}', '\u{0}'], + ['\u{399}', '\u{308}', '\u{300}'], ['\u{399}', '\u{308}', '\u{301}'], + ['\u{399}', '\u{342}', '\u{0}'], ['\u{399}', '\u{308}', '\u{342}'], + ['\u{3a5}', '\u{308}', '\u{300}'], ['\u{3a5}', '\u{308}', '\u{301}'], + ['\u{3a1}', '\u{313}', '\u{0}'], ['\u{3a5}', '\u{342}', '\u{0}'], + ['\u{3a5}', '\u{308}', '\u{342}'], ['\u{1ffa}', '\u{399}', '\u{0}'], + ['\u{3a9}', '\u{399}', '\u{0}'], ['\u{38f}', '\u{399}', '\u{0}'], + ['\u{3a9}', '\u{342}', '\u{0}'], ['\u{3a9}', '\u{342}', '\u{399}'], + ['\u{3a9}', '\u{399}', '\u{0}'], ['F', 'F', '\u{0}'], ['F', 'I', '\u{0}'], + ['F', 'L', '\u{0}'], ['F', 'F', 'I'], ['F', 'F', 'L'], ['S', 'T', '\u{0}'], + ['S', 'T', '\u{0}'], ['\u{544}', '\u{546}', '\u{0}'], ['\u{544}', '\u{535}', '\u{0}'], ['\u{544}', '\u{53b}', '\u{0}'], ['\u{54e}', '\u{546}', '\u{0}'], ['\u{544}', '\u{53d}', '\u{0}'], ]; - - #[inline] - pub fn to_upper(c: char) -> [char; 3] { - const { - let mut i = 0; - while i < UPPERCASE_TABLE.len() { - let (_, val) = UPPERCASE_TABLE[i]; - if val & (1 << 22) == 0 { - assert!(char::from_u32(val).is_some()); - } else { - let index = val & ((1 << 22) - 1); - assert!((index as usize) < UPPERCASE_TABLE_MULTI.len()); - } - i += 1; - } - } - - // SAFETY: Just checked that the tables are valid - unsafe { - super::case_conversion( - c, - |c| c.to_ascii_uppercase(), - UPPERCASE_TABLE, - UPPERCASE_TABLE_MULTI, - ) - } - } } diff --git a/coretests/tests/lib.rs b/coretests/tests/lib.rs index 60a09986eb0a4..80b62038c40ec 100644 --- a/coretests/tests/lib.rs +++ b/coretests/tests/lib.rs @@ -116,7 +116,6 @@ #![feature(try_find)] #![feature(try_trait_v2)] #![feature(uint_bit_width)] -#![feature(unicode_internals)] #![feature(unsize)] #![feature(unwrap_infallible)] // tidy-alphabetical-end diff --git a/coretests/tests/unicode.rs b/coretests/tests/unicode.rs index 445175c685784..bbace0ef66ca3 100644 --- a/coretests/tests/unicode.rs +++ b/coretests/tests/unicode.rs @@ -1,101 +1,5 @@ -use core::unicode::unicode_data; -use std::ops::RangeInclusive; - -mod test_data; - #[test] pub fn version() { let (major, _minor, _update) = core::char::UNICODE_VERSION; assert!(major >= 10); } - -#[track_caller] -fn test_boolean_property(ranges: &[RangeInclusive], lookup: fn(char) -> bool) { - let mut start = '\u{80}'; - for range in ranges { - for c in start..*range.start() { - assert!(!lookup(c), "{c:?}"); - } - for c in range.clone() { - assert!(lookup(c), "{c:?}"); - } - start = char::from_u32(*range.end() as u32 + 1).unwrap(); - } - for c in start..=char::MAX { - assert!(!lookup(c), "{c:?}"); - } -} - -#[track_caller] -fn test_case_mapping(ranges: &[(char, [char; 3])], lookup: fn(char) -> [char; 3]) { - let mut start = '\u{80}'; - for &(key, val) in ranges { - for c in start..key { - assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}"); - } - assert_eq!(lookup(key), val, "{key:?}"); - start = char::from_u32(key as u32 + 1).unwrap(); - } - for c in start..=char::MAX { - assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}"); - } -} - -#[test] -#[cfg_attr(miri, ignore)] -fn alphabetic() { - test_boolean_property(test_data::ALPHABETIC, unicode_data::alphabetic::lookup); -} - -#[test] -#[cfg_attr(miri, ignore)] -fn case_ignorable() { - test_boolean_property(test_data::CASE_IGNORABLE, unicode_data::case_ignorable::lookup); -} - -#[test] -#[cfg_attr(miri, ignore)] -fn cased() { - test_boolean_property(test_data::CASED, unicode_data::cased::lookup); -} - -#[test] -#[cfg_attr(miri, ignore)] -fn grapheme_extend() { - test_boolean_property(test_data::GRAPHEME_EXTEND, unicode_data::grapheme_extend::lookup); -} - -#[test] -#[cfg_attr(miri, ignore)] -fn lowercase() { - test_boolean_property(test_data::LOWERCASE, unicode_data::lowercase::lookup); -} - -#[test] -fn n() { - test_boolean_property(test_data::N, unicode_data::n::lookup); -} - -#[test] -#[cfg_attr(miri, ignore)] -fn uppercase() { - test_boolean_property(test_data::UPPERCASE, unicode_data::uppercase::lookup); -} - -#[test] -#[cfg_attr(miri, ignore)] -fn white_space() { - test_boolean_property(test_data::WHITE_SPACE, unicode_data::white_space::lookup); -} - -#[test] -#[cfg_attr(miri, ignore)] -fn to_lowercase() { - test_case_mapping(test_data::TO_LOWER, unicode_data::conversions::to_lower); -} - -#[test] -#[cfg_attr(miri, ignore)] -fn to_uppercase() { - test_case_mapping(test_data::TO_UPPER, unicode_data::conversions::to_upper); -} diff --git a/coretests/tests/unicode/test_data.rs b/coretests/tests/unicode/test_data.rs deleted file mode 100644 index f53cd7dc22705..0000000000000 --- a/coretests/tests/unicode/test_data.rs +++ /dev/null @@ -1,2928 +0,0 @@ -//! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually! -// ignore-tidy-filelength - -use std::ops::RangeInclusive; - -#[rustfmt::skip] -pub(super) static ALPHABETIC: &[RangeInclusive; 759] = &[ - '\u{aa}'..='\u{aa}', '\u{b5}'..='\u{b5}', '\u{ba}'..='\u{ba}', '\u{c0}'..='\u{d6}', - '\u{d8}'..='\u{f6}', '\u{f8}'..='\u{2c1}', '\u{2c6}'..='\u{2d1}', '\u{2e0}'..='\u{2e4}', - '\u{2ec}'..='\u{2ec}', '\u{2ee}'..='\u{2ee}', '\u{345}'..='\u{345}', '\u{363}'..='\u{374}', - '\u{376}'..='\u{377}', '\u{37a}'..='\u{37d}', '\u{37f}'..='\u{37f}', '\u{386}'..='\u{386}', - '\u{388}'..='\u{38a}', '\u{38c}'..='\u{38c}', '\u{38e}'..='\u{3a1}', '\u{3a3}'..='\u{3f5}', - '\u{3f7}'..='\u{481}', '\u{48a}'..='\u{52f}', '\u{531}'..='\u{556}', '\u{559}'..='\u{559}', - '\u{560}'..='\u{588}', '\u{5b0}'..='\u{5bd}', '\u{5bf}'..='\u{5bf}', '\u{5c1}'..='\u{5c2}', - '\u{5c4}'..='\u{5c5}', '\u{5c7}'..='\u{5c7}', '\u{5d0}'..='\u{5ea}', '\u{5ef}'..='\u{5f2}', - '\u{610}'..='\u{61a}', '\u{620}'..='\u{657}', '\u{659}'..='\u{65f}', '\u{66e}'..='\u{6d3}', - '\u{6d5}'..='\u{6dc}', '\u{6e1}'..='\u{6e8}', '\u{6ed}'..='\u{6ef}', '\u{6fa}'..='\u{6fc}', - '\u{6ff}'..='\u{6ff}', '\u{710}'..='\u{73f}', '\u{74d}'..='\u{7b1}', '\u{7ca}'..='\u{7ea}', - '\u{7f4}'..='\u{7f5}', '\u{7fa}'..='\u{7fa}', '\u{800}'..='\u{817}', '\u{81a}'..='\u{82c}', - '\u{840}'..='\u{858}', '\u{860}'..='\u{86a}', '\u{870}'..='\u{887}', '\u{889}'..='\u{88f}', - '\u{897}'..='\u{897}', '\u{8a0}'..='\u{8c9}', '\u{8d4}'..='\u{8df}', '\u{8e3}'..='\u{8e9}', - '\u{8f0}'..='\u{93b}', '\u{93d}'..='\u{94c}', '\u{94e}'..='\u{950}', '\u{955}'..='\u{963}', - '\u{971}'..='\u{983}', '\u{985}'..='\u{98c}', '\u{98f}'..='\u{990}', '\u{993}'..='\u{9a8}', - '\u{9aa}'..='\u{9b0}', '\u{9b2}'..='\u{9b2}', '\u{9b6}'..='\u{9b9}', '\u{9bd}'..='\u{9c4}', - '\u{9c7}'..='\u{9c8}', '\u{9cb}'..='\u{9cc}', '\u{9ce}'..='\u{9ce}', '\u{9d7}'..='\u{9d7}', - '\u{9dc}'..='\u{9dd}', '\u{9df}'..='\u{9e3}', '\u{9f0}'..='\u{9f1}', '\u{9fc}'..='\u{9fc}', - '\u{a01}'..='\u{a03}', '\u{a05}'..='\u{a0a}', '\u{a0f}'..='\u{a10}', '\u{a13}'..='\u{a28}', - '\u{a2a}'..='\u{a30}', '\u{a32}'..='\u{a33}', '\u{a35}'..='\u{a36}', '\u{a38}'..='\u{a39}', - '\u{a3e}'..='\u{a42}', '\u{a47}'..='\u{a48}', '\u{a4b}'..='\u{a4c}', '\u{a51}'..='\u{a51}', - '\u{a59}'..='\u{a5c}', '\u{a5e}'..='\u{a5e}', '\u{a70}'..='\u{a75}', '\u{a81}'..='\u{a83}', - '\u{a85}'..='\u{a8d}', '\u{a8f}'..='\u{a91}', '\u{a93}'..='\u{aa8}', '\u{aaa}'..='\u{ab0}', - '\u{ab2}'..='\u{ab3}', '\u{ab5}'..='\u{ab9}', '\u{abd}'..='\u{ac5}', '\u{ac7}'..='\u{ac9}', - '\u{acb}'..='\u{acc}', '\u{ad0}'..='\u{ad0}', '\u{ae0}'..='\u{ae3}', '\u{af9}'..='\u{afc}', - '\u{b01}'..='\u{b03}', '\u{b05}'..='\u{b0c}', '\u{b0f}'..='\u{b10}', '\u{b13}'..='\u{b28}', - '\u{b2a}'..='\u{b30}', '\u{b32}'..='\u{b33}', '\u{b35}'..='\u{b39}', '\u{b3d}'..='\u{b44}', - '\u{b47}'..='\u{b48}', '\u{b4b}'..='\u{b4c}', '\u{b56}'..='\u{b57}', '\u{b5c}'..='\u{b5d}', - '\u{b5f}'..='\u{b63}', '\u{b71}'..='\u{b71}', '\u{b82}'..='\u{b83}', '\u{b85}'..='\u{b8a}', - '\u{b8e}'..='\u{b90}', '\u{b92}'..='\u{b95}', '\u{b99}'..='\u{b9a}', '\u{b9c}'..='\u{b9c}', - '\u{b9e}'..='\u{b9f}', '\u{ba3}'..='\u{ba4}', '\u{ba8}'..='\u{baa}', '\u{bae}'..='\u{bb9}', - '\u{bbe}'..='\u{bc2}', '\u{bc6}'..='\u{bc8}', '\u{bca}'..='\u{bcc}', '\u{bd0}'..='\u{bd0}', - '\u{bd7}'..='\u{bd7}', '\u{c00}'..='\u{c0c}', '\u{c0e}'..='\u{c10}', '\u{c12}'..='\u{c28}', - '\u{c2a}'..='\u{c39}', '\u{c3d}'..='\u{c44}', '\u{c46}'..='\u{c48}', '\u{c4a}'..='\u{c4c}', - '\u{c55}'..='\u{c56}', '\u{c58}'..='\u{c5a}', '\u{c5c}'..='\u{c5d}', '\u{c60}'..='\u{c63}', - '\u{c80}'..='\u{c83}', '\u{c85}'..='\u{c8c}', '\u{c8e}'..='\u{c90}', '\u{c92}'..='\u{ca8}', - '\u{caa}'..='\u{cb3}', '\u{cb5}'..='\u{cb9}', '\u{cbd}'..='\u{cc4}', '\u{cc6}'..='\u{cc8}', - '\u{cca}'..='\u{ccc}', '\u{cd5}'..='\u{cd6}', '\u{cdc}'..='\u{cde}', '\u{ce0}'..='\u{ce3}', - '\u{cf1}'..='\u{cf3}', '\u{d00}'..='\u{d0c}', '\u{d0e}'..='\u{d10}', '\u{d12}'..='\u{d3a}', - '\u{d3d}'..='\u{d44}', '\u{d46}'..='\u{d48}', '\u{d4a}'..='\u{d4c}', '\u{d4e}'..='\u{d4e}', - '\u{d54}'..='\u{d57}', '\u{d5f}'..='\u{d63}', '\u{d7a}'..='\u{d7f}', '\u{d81}'..='\u{d83}', - '\u{d85}'..='\u{d96}', '\u{d9a}'..='\u{db1}', '\u{db3}'..='\u{dbb}', '\u{dbd}'..='\u{dbd}', - '\u{dc0}'..='\u{dc6}', '\u{dcf}'..='\u{dd4}', '\u{dd6}'..='\u{dd6}', '\u{dd8}'..='\u{ddf}', - '\u{df2}'..='\u{df3}', '\u{e01}'..='\u{e3a}', '\u{e40}'..='\u{e46}', '\u{e4d}'..='\u{e4d}', - '\u{e81}'..='\u{e82}', '\u{e84}'..='\u{e84}', '\u{e86}'..='\u{e8a}', '\u{e8c}'..='\u{ea3}', - '\u{ea5}'..='\u{ea5}', '\u{ea7}'..='\u{eb9}', '\u{ebb}'..='\u{ebd}', '\u{ec0}'..='\u{ec4}', - '\u{ec6}'..='\u{ec6}', '\u{ecd}'..='\u{ecd}', '\u{edc}'..='\u{edf}', '\u{f00}'..='\u{f00}', - '\u{f40}'..='\u{f47}', '\u{f49}'..='\u{f6c}', '\u{f71}'..='\u{f83}', '\u{f88}'..='\u{f97}', - '\u{f99}'..='\u{fbc}', '\u{1000}'..='\u{1036}', '\u{1038}'..='\u{1038}', - '\u{103b}'..='\u{103f}', '\u{1050}'..='\u{108f}', '\u{109a}'..='\u{109d}', - '\u{10a0}'..='\u{10c5}', '\u{10c7}'..='\u{10c7}', '\u{10cd}'..='\u{10cd}', - '\u{10d0}'..='\u{10fa}', '\u{10fc}'..='\u{1248}', '\u{124a}'..='\u{124d}', - '\u{1250}'..='\u{1256}', '\u{1258}'..='\u{1258}', '\u{125a}'..='\u{125d}', - '\u{1260}'..='\u{1288}', '\u{128a}'..='\u{128d}', '\u{1290}'..='\u{12b0}', - '\u{12b2}'..='\u{12b5}', '\u{12b8}'..='\u{12be}', '\u{12c0}'..='\u{12c0}', - '\u{12c2}'..='\u{12c5}', '\u{12c8}'..='\u{12d6}', '\u{12d8}'..='\u{1310}', - '\u{1312}'..='\u{1315}', '\u{1318}'..='\u{135a}', '\u{1380}'..='\u{138f}', - '\u{13a0}'..='\u{13f5}', '\u{13f8}'..='\u{13fd}', '\u{1401}'..='\u{166c}', - '\u{166f}'..='\u{167f}', '\u{1681}'..='\u{169a}', '\u{16a0}'..='\u{16ea}', - '\u{16ee}'..='\u{16f8}', '\u{1700}'..='\u{1713}', '\u{171f}'..='\u{1733}', - '\u{1740}'..='\u{1753}', '\u{1760}'..='\u{176c}', '\u{176e}'..='\u{1770}', - '\u{1772}'..='\u{1773}', '\u{1780}'..='\u{17b3}', '\u{17b6}'..='\u{17c8}', - '\u{17d7}'..='\u{17d7}', '\u{17dc}'..='\u{17dc}', '\u{1820}'..='\u{1878}', - '\u{1880}'..='\u{18aa}', '\u{18b0}'..='\u{18f5}', '\u{1900}'..='\u{191e}', - '\u{1920}'..='\u{192b}', '\u{1930}'..='\u{1938}', '\u{1950}'..='\u{196d}', - '\u{1970}'..='\u{1974}', '\u{1980}'..='\u{19ab}', '\u{19b0}'..='\u{19c9}', - '\u{1a00}'..='\u{1a1b}', '\u{1a20}'..='\u{1a5e}', '\u{1a61}'..='\u{1a74}', - '\u{1aa7}'..='\u{1aa7}', '\u{1abf}'..='\u{1ac0}', '\u{1acc}'..='\u{1ace}', - '\u{1b00}'..='\u{1b33}', '\u{1b35}'..='\u{1b43}', '\u{1b45}'..='\u{1b4c}', - '\u{1b80}'..='\u{1ba9}', '\u{1bac}'..='\u{1baf}', '\u{1bba}'..='\u{1be5}', - '\u{1be7}'..='\u{1bf1}', '\u{1c00}'..='\u{1c36}', '\u{1c4d}'..='\u{1c4f}', - '\u{1c5a}'..='\u{1c7d}', '\u{1c80}'..='\u{1c8a}', '\u{1c90}'..='\u{1cba}', - '\u{1cbd}'..='\u{1cbf}', '\u{1ce9}'..='\u{1cec}', '\u{1cee}'..='\u{1cf3}', - '\u{1cf5}'..='\u{1cf6}', '\u{1cfa}'..='\u{1cfa}', '\u{1d00}'..='\u{1dbf}', - '\u{1dd3}'..='\u{1df4}', '\u{1e00}'..='\u{1f15}', '\u{1f18}'..='\u{1f1d}', - '\u{1f20}'..='\u{1f45}', '\u{1f48}'..='\u{1f4d}', '\u{1f50}'..='\u{1f57}', - '\u{1f59}'..='\u{1f59}', '\u{1f5b}'..='\u{1f5b}', '\u{1f5d}'..='\u{1f5d}', - '\u{1f5f}'..='\u{1f7d}', '\u{1f80}'..='\u{1fb4}', '\u{1fb6}'..='\u{1fbc}', - '\u{1fbe}'..='\u{1fbe}', '\u{1fc2}'..='\u{1fc4}', '\u{1fc6}'..='\u{1fcc}', - '\u{1fd0}'..='\u{1fd3}', '\u{1fd6}'..='\u{1fdb}', '\u{1fe0}'..='\u{1fec}', - '\u{1ff2}'..='\u{1ff4}', '\u{1ff6}'..='\u{1ffc}', '\u{2071}'..='\u{2071}', - '\u{207f}'..='\u{207f}', '\u{2090}'..='\u{209c}', '\u{2102}'..='\u{2102}', - '\u{2107}'..='\u{2107}', '\u{210a}'..='\u{2113}', '\u{2115}'..='\u{2115}', - '\u{2119}'..='\u{211d}', '\u{2124}'..='\u{2124}', '\u{2126}'..='\u{2126}', - '\u{2128}'..='\u{2128}', '\u{212a}'..='\u{212d}', '\u{212f}'..='\u{2139}', - '\u{213c}'..='\u{213f}', '\u{2145}'..='\u{2149}', '\u{214e}'..='\u{214e}', - '\u{2160}'..='\u{2188}', '\u{24b6}'..='\u{24e9}', '\u{2c00}'..='\u{2ce4}', - '\u{2ceb}'..='\u{2cee}', '\u{2cf2}'..='\u{2cf3}', '\u{2d00}'..='\u{2d25}', - '\u{2d27}'..='\u{2d27}', '\u{2d2d}'..='\u{2d2d}', '\u{2d30}'..='\u{2d67}', - '\u{2d6f}'..='\u{2d6f}', '\u{2d80}'..='\u{2d96}', '\u{2da0}'..='\u{2da6}', - '\u{2da8}'..='\u{2dae}', '\u{2db0}'..='\u{2db6}', '\u{2db8}'..='\u{2dbe}', - '\u{2dc0}'..='\u{2dc6}', '\u{2dc8}'..='\u{2dce}', '\u{2dd0}'..='\u{2dd6}', - '\u{2dd8}'..='\u{2dde}', '\u{2de0}'..='\u{2dff}', '\u{2e2f}'..='\u{2e2f}', - '\u{3005}'..='\u{3007}', '\u{3021}'..='\u{3029}', '\u{3031}'..='\u{3035}', - '\u{3038}'..='\u{303c}', '\u{3041}'..='\u{3096}', '\u{309d}'..='\u{309f}', - '\u{30a1}'..='\u{30fa}', '\u{30fc}'..='\u{30ff}', '\u{3105}'..='\u{312f}', - '\u{3131}'..='\u{318e}', '\u{31a0}'..='\u{31bf}', '\u{31f0}'..='\u{31ff}', - '\u{3400}'..='\u{4dbf}', '\u{4e00}'..='\u{a48c}', '\u{a4d0}'..='\u{a4fd}', - '\u{a500}'..='\u{a60c}', '\u{a610}'..='\u{a61f}', '\u{a62a}'..='\u{a62b}', - '\u{a640}'..='\u{a66e}', '\u{a674}'..='\u{a67b}', '\u{a67f}'..='\u{a6ef}', - '\u{a717}'..='\u{a71f}', '\u{a722}'..='\u{a788}', '\u{a78b}'..='\u{a7dc}', - '\u{a7f1}'..='\u{a805}', '\u{a807}'..='\u{a827}', '\u{a840}'..='\u{a873}', - '\u{a880}'..='\u{a8c3}', '\u{a8c5}'..='\u{a8c5}', '\u{a8f2}'..='\u{a8f7}', - '\u{a8fb}'..='\u{a8fb}', '\u{a8fd}'..='\u{a8ff}', '\u{a90a}'..='\u{a92a}', - '\u{a930}'..='\u{a952}', '\u{a960}'..='\u{a97c}', '\u{a980}'..='\u{a9b2}', - '\u{a9b4}'..='\u{a9bf}', '\u{a9cf}'..='\u{a9cf}', '\u{a9e0}'..='\u{a9ef}', - '\u{a9fa}'..='\u{a9fe}', '\u{aa00}'..='\u{aa36}', '\u{aa40}'..='\u{aa4d}', - '\u{aa60}'..='\u{aa76}', '\u{aa7a}'..='\u{aabe}', '\u{aac0}'..='\u{aac0}', - '\u{aac2}'..='\u{aac2}', '\u{aadb}'..='\u{aadd}', '\u{aae0}'..='\u{aaef}', - '\u{aaf2}'..='\u{aaf5}', '\u{ab01}'..='\u{ab06}', '\u{ab09}'..='\u{ab0e}', - '\u{ab11}'..='\u{ab16}', '\u{ab20}'..='\u{ab26}', '\u{ab28}'..='\u{ab2e}', - '\u{ab30}'..='\u{ab5a}', '\u{ab5c}'..='\u{ab69}', '\u{ab70}'..='\u{abea}', - '\u{ac00}'..='\u{d7a3}', '\u{d7b0}'..='\u{d7c6}', '\u{d7cb}'..='\u{d7fb}', - '\u{f900}'..='\u{fa6d}', '\u{fa70}'..='\u{fad9}', '\u{fb00}'..='\u{fb06}', - '\u{fb13}'..='\u{fb17}', '\u{fb1d}'..='\u{fb28}', '\u{fb2a}'..='\u{fb36}', - '\u{fb38}'..='\u{fb3c}', '\u{fb3e}'..='\u{fb3e}', '\u{fb40}'..='\u{fb41}', - '\u{fb43}'..='\u{fb44}', '\u{fb46}'..='\u{fbb1}', '\u{fbd3}'..='\u{fd3d}', - '\u{fd50}'..='\u{fd8f}', '\u{fd92}'..='\u{fdc7}', '\u{fdf0}'..='\u{fdfb}', - '\u{fe70}'..='\u{fe74}', '\u{fe76}'..='\u{fefc}', '\u{ff21}'..='\u{ff3a}', - '\u{ff41}'..='\u{ff5a}', '\u{ff66}'..='\u{ffbe}', '\u{ffc2}'..='\u{ffc7}', - '\u{ffca}'..='\u{ffcf}', '\u{ffd2}'..='\u{ffd7}', '\u{ffda}'..='\u{ffdc}', - '\u{10000}'..='\u{1000b}', '\u{1000d}'..='\u{10026}', '\u{10028}'..='\u{1003a}', - '\u{1003c}'..='\u{1003d}', '\u{1003f}'..='\u{1004d}', '\u{10050}'..='\u{1005d}', - '\u{10080}'..='\u{100fa}', '\u{10140}'..='\u{10174}', '\u{10280}'..='\u{1029c}', - '\u{102a0}'..='\u{102d0}', '\u{10300}'..='\u{1031f}', '\u{1032d}'..='\u{1034a}', - '\u{10350}'..='\u{1037a}', '\u{10380}'..='\u{1039d}', '\u{103a0}'..='\u{103c3}', - '\u{103c8}'..='\u{103cf}', '\u{103d1}'..='\u{103d5}', '\u{10400}'..='\u{1049d}', - '\u{104b0}'..='\u{104d3}', '\u{104d8}'..='\u{104fb}', '\u{10500}'..='\u{10527}', - '\u{10530}'..='\u{10563}', '\u{10570}'..='\u{1057a}', '\u{1057c}'..='\u{1058a}', - '\u{1058c}'..='\u{10592}', '\u{10594}'..='\u{10595}', '\u{10597}'..='\u{105a1}', - '\u{105a3}'..='\u{105b1}', '\u{105b3}'..='\u{105b9}', '\u{105bb}'..='\u{105bc}', - '\u{105c0}'..='\u{105f3}', '\u{10600}'..='\u{10736}', '\u{10740}'..='\u{10755}', - '\u{10760}'..='\u{10767}', '\u{10780}'..='\u{10785}', '\u{10787}'..='\u{107b0}', - '\u{107b2}'..='\u{107ba}', '\u{10800}'..='\u{10805}', '\u{10808}'..='\u{10808}', - '\u{1080a}'..='\u{10835}', '\u{10837}'..='\u{10838}', '\u{1083c}'..='\u{1083c}', - '\u{1083f}'..='\u{10855}', '\u{10860}'..='\u{10876}', '\u{10880}'..='\u{1089e}', - '\u{108e0}'..='\u{108f2}', '\u{108f4}'..='\u{108f5}', '\u{10900}'..='\u{10915}', - '\u{10920}'..='\u{10939}', '\u{10940}'..='\u{10959}', '\u{10980}'..='\u{109b7}', - '\u{109be}'..='\u{109bf}', '\u{10a00}'..='\u{10a03}', '\u{10a05}'..='\u{10a06}', - '\u{10a0c}'..='\u{10a13}', '\u{10a15}'..='\u{10a17}', '\u{10a19}'..='\u{10a35}', - '\u{10a60}'..='\u{10a7c}', '\u{10a80}'..='\u{10a9c}', '\u{10ac0}'..='\u{10ac7}', - '\u{10ac9}'..='\u{10ae4}', '\u{10b00}'..='\u{10b35}', '\u{10b40}'..='\u{10b55}', - '\u{10b60}'..='\u{10b72}', '\u{10b80}'..='\u{10b91}', '\u{10c00}'..='\u{10c48}', - '\u{10c80}'..='\u{10cb2}', '\u{10cc0}'..='\u{10cf2}', '\u{10d00}'..='\u{10d27}', - '\u{10d4a}'..='\u{10d65}', '\u{10d69}'..='\u{10d69}', '\u{10d6f}'..='\u{10d85}', - '\u{10e80}'..='\u{10ea9}', '\u{10eab}'..='\u{10eac}', '\u{10eb0}'..='\u{10eb1}', - '\u{10ec2}'..='\u{10ec7}', '\u{10efa}'..='\u{10efc}', '\u{10f00}'..='\u{10f1c}', - '\u{10f27}'..='\u{10f27}', '\u{10f30}'..='\u{10f45}', '\u{10f70}'..='\u{10f81}', - '\u{10fb0}'..='\u{10fc4}', '\u{10fe0}'..='\u{10ff6}', '\u{11000}'..='\u{11045}', - '\u{11071}'..='\u{11075}', '\u{11080}'..='\u{110b8}', '\u{110c2}'..='\u{110c2}', - '\u{110d0}'..='\u{110e8}', '\u{11100}'..='\u{11132}', '\u{11144}'..='\u{11147}', - '\u{11150}'..='\u{11172}', '\u{11176}'..='\u{11176}', '\u{11180}'..='\u{111bf}', - '\u{111c1}'..='\u{111c4}', '\u{111ce}'..='\u{111cf}', '\u{111da}'..='\u{111da}', - '\u{111dc}'..='\u{111dc}', '\u{11200}'..='\u{11211}', '\u{11213}'..='\u{11234}', - '\u{11237}'..='\u{11237}', '\u{1123e}'..='\u{11241}', '\u{11280}'..='\u{11286}', - '\u{11288}'..='\u{11288}', '\u{1128a}'..='\u{1128d}', '\u{1128f}'..='\u{1129d}', - '\u{1129f}'..='\u{112a8}', '\u{112b0}'..='\u{112e8}', '\u{11300}'..='\u{11303}', - '\u{11305}'..='\u{1130c}', '\u{1130f}'..='\u{11310}', '\u{11313}'..='\u{11328}', - '\u{1132a}'..='\u{11330}', '\u{11332}'..='\u{11333}', '\u{11335}'..='\u{11339}', - '\u{1133d}'..='\u{11344}', '\u{11347}'..='\u{11348}', '\u{1134b}'..='\u{1134c}', - '\u{11350}'..='\u{11350}', '\u{11357}'..='\u{11357}', '\u{1135d}'..='\u{11363}', - '\u{11380}'..='\u{11389}', '\u{1138b}'..='\u{1138b}', '\u{1138e}'..='\u{1138e}', - '\u{11390}'..='\u{113b5}', '\u{113b7}'..='\u{113c0}', '\u{113c2}'..='\u{113c2}', - '\u{113c5}'..='\u{113c5}', '\u{113c7}'..='\u{113ca}', '\u{113cc}'..='\u{113cd}', - '\u{113d1}'..='\u{113d1}', '\u{113d3}'..='\u{113d3}', '\u{11400}'..='\u{11441}', - '\u{11443}'..='\u{11445}', '\u{11447}'..='\u{1144a}', '\u{1145f}'..='\u{11461}', - '\u{11480}'..='\u{114c1}', '\u{114c4}'..='\u{114c5}', '\u{114c7}'..='\u{114c7}', - '\u{11580}'..='\u{115b5}', '\u{115b8}'..='\u{115be}', '\u{115d8}'..='\u{115dd}', - '\u{11600}'..='\u{1163e}', '\u{11640}'..='\u{11640}', '\u{11644}'..='\u{11644}', - '\u{11680}'..='\u{116b5}', '\u{116b8}'..='\u{116b8}', '\u{11700}'..='\u{1171a}', - '\u{1171d}'..='\u{1172a}', '\u{11740}'..='\u{11746}', '\u{11800}'..='\u{11838}', - '\u{118a0}'..='\u{118df}', '\u{118ff}'..='\u{11906}', '\u{11909}'..='\u{11909}', - '\u{1190c}'..='\u{11913}', '\u{11915}'..='\u{11916}', '\u{11918}'..='\u{11935}', - '\u{11937}'..='\u{11938}', '\u{1193b}'..='\u{1193c}', '\u{1193f}'..='\u{11942}', - '\u{119a0}'..='\u{119a7}', '\u{119aa}'..='\u{119d7}', '\u{119da}'..='\u{119df}', - '\u{119e1}'..='\u{119e1}', '\u{119e3}'..='\u{119e4}', '\u{11a00}'..='\u{11a32}', - '\u{11a35}'..='\u{11a3e}', '\u{11a50}'..='\u{11a97}', '\u{11a9d}'..='\u{11a9d}', - '\u{11ab0}'..='\u{11af8}', '\u{11b60}'..='\u{11b67}', '\u{11bc0}'..='\u{11be0}', - '\u{11c00}'..='\u{11c08}', '\u{11c0a}'..='\u{11c36}', '\u{11c38}'..='\u{11c3e}', - '\u{11c40}'..='\u{11c40}', '\u{11c72}'..='\u{11c8f}', '\u{11c92}'..='\u{11ca7}', - '\u{11ca9}'..='\u{11cb6}', '\u{11d00}'..='\u{11d06}', '\u{11d08}'..='\u{11d09}', - '\u{11d0b}'..='\u{11d36}', '\u{11d3a}'..='\u{11d3a}', '\u{11d3c}'..='\u{11d3d}', - '\u{11d3f}'..='\u{11d41}', '\u{11d43}'..='\u{11d43}', '\u{11d46}'..='\u{11d47}', - '\u{11d60}'..='\u{11d65}', '\u{11d67}'..='\u{11d68}', '\u{11d6a}'..='\u{11d8e}', - '\u{11d90}'..='\u{11d91}', '\u{11d93}'..='\u{11d96}', '\u{11d98}'..='\u{11d98}', - '\u{11db0}'..='\u{11ddb}', '\u{11ee0}'..='\u{11ef6}', '\u{11f00}'..='\u{11f10}', - '\u{11f12}'..='\u{11f3a}', '\u{11f3e}'..='\u{11f40}', '\u{11fb0}'..='\u{11fb0}', - '\u{12000}'..='\u{12399}', '\u{12400}'..='\u{1246e}', '\u{12480}'..='\u{12543}', - '\u{12f90}'..='\u{12ff0}', '\u{13000}'..='\u{1342f}', '\u{13441}'..='\u{13446}', - '\u{13460}'..='\u{143fa}', '\u{14400}'..='\u{14646}', '\u{16100}'..='\u{1612e}', - '\u{16800}'..='\u{16a38}', '\u{16a40}'..='\u{16a5e}', '\u{16a70}'..='\u{16abe}', - '\u{16ad0}'..='\u{16aed}', '\u{16b00}'..='\u{16b2f}', '\u{16b40}'..='\u{16b43}', - '\u{16b63}'..='\u{16b77}', '\u{16b7d}'..='\u{16b8f}', '\u{16d40}'..='\u{16d6c}', - '\u{16e40}'..='\u{16e7f}', '\u{16ea0}'..='\u{16eb8}', '\u{16ebb}'..='\u{16ed3}', - '\u{16f00}'..='\u{16f4a}', '\u{16f4f}'..='\u{16f87}', '\u{16f8f}'..='\u{16f9f}', - '\u{16fe0}'..='\u{16fe1}', '\u{16fe3}'..='\u{16fe3}', '\u{16ff0}'..='\u{16ff6}', - '\u{17000}'..='\u{18cd5}', '\u{18cff}'..='\u{18d1e}', '\u{18d80}'..='\u{18df2}', - '\u{1aff0}'..='\u{1aff3}', '\u{1aff5}'..='\u{1affb}', '\u{1affd}'..='\u{1affe}', - '\u{1b000}'..='\u{1b122}', '\u{1b132}'..='\u{1b132}', '\u{1b150}'..='\u{1b152}', - '\u{1b155}'..='\u{1b155}', '\u{1b164}'..='\u{1b167}', '\u{1b170}'..='\u{1b2fb}', - '\u{1bc00}'..='\u{1bc6a}', '\u{1bc70}'..='\u{1bc7c}', '\u{1bc80}'..='\u{1bc88}', - '\u{1bc90}'..='\u{1bc99}', '\u{1bc9e}'..='\u{1bc9e}', '\u{1d400}'..='\u{1d454}', - '\u{1d456}'..='\u{1d49c}', '\u{1d49e}'..='\u{1d49f}', '\u{1d4a2}'..='\u{1d4a2}', - '\u{1d4a5}'..='\u{1d4a6}', '\u{1d4a9}'..='\u{1d4ac}', '\u{1d4ae}'..='\u{1d4b9}', - '\u{1d4bb}'..='\u{1d4bb}', '\u{1d4bd}'..='\u{1d4c3}', '\u{1d4c5}'..='\u{1d505}', - '\u{1d507}'..='\u{1d50a}', '\u{1d50d}'..='\u{1d514}', '\u{1d516}'..='\u{1d51c}', - '\u{1d51e}'..='\u{1d539}', '\u{1d53b}'..='\u{1d53e}', '\u{1d540}'..='\u{1d544}', - '\u{1d546}'..='\u{1d546}', '\u{1d54a}'..='\u{1d550}', '\u{1d552}'..='\u{1d6a5}', - '\u{1d6a8}'..='\u{1d6c0}', '\u{1d6c2}'..='\u{1d6da}', '\u{1d6dc}'..='\u{1d6fa}', - '\u{1d6fc}'..='\u{1d714}', '\u{1d716}'..='\u{1d734}', '\u{1d736}'..='\u{1d74e}', - '\u{1d750}'..='\u{1d76e}', '\u{1d770}'..='\u{1d788}', '\u{1d78a}'..='\u{1d7a8}', - '\u{1d7aa}'..='\u{1d7c2}', '\u{1d7c4}'..='\u{1d7cb}', '\u{1df00}'..='\u{1df1e}', - '\u{1df25}'..='\u{1df2a}', '\u{1e000}'..='\u{1e006}', '\u{1e008}'..='\u{1e018}', - '\u{1e01b}'..='\u{1e021}', '\u{1e023}'..='\u{1e024}', '\u{1e026}'..='\u{1e02a}', - '\u{1e030}'..='\u{1e06d}', '\u{1e08f}'..='\u{1e08f}', '\u{1e100}'..='\u{1e12c}', - '\u{1e137}'..='\u{1e13d}', '\u{1e14e}'..='\u{1e14e}', '\u{1e290}'..='\u{1e2ad}', - '\u{1e2c0}'..='\u{1e2eb}', '\u{1e4d0}'..='\u{1e4eb}', '\u{1e5d0}'..='\u{1e5ed}', - '\u{1e5f0}'..='\u{1e5f0}', '\u{1e6c0}'..='\u{1e6de}', '\u{1e6e0}'..='\u{1e6f5}', - '\u{1e6fe}'..='\u{1e6ff}', '\u{1e7e0}'..='\u{1e7e6}', '\u{1e7e8}'..='\u{1e7eb}', - '\u{1e7ed}'..='\u{1e7ee}', '\u{1e7f0}'..='\u{1e7fe}', '\u{1e800}'..='\u{1e8c4}', - '\u{1e900}'..='\u{1e943}', '\u{1e947}'..='\u{1e947}', '\u{1e94b}'..='\u{1e94b}', - '\u{1ee00}'..='\u{1ee03}', '\u{1ee05}'..='\u{1ee1f}', '\u{1ee21}'..='\u{1ee22}', - '\u{1ee24}'..='\u{1ee24}', '\u{1ee27}'..='\u{1ee27}', '\u{1ee29}'..='\u{1ee32}', - '\u{1ee34}'..='\u{1ee37}', '\u{1ee39}'..='\u{1ee39}', '\u{1ee3b}'..='\u{1ee3b}', - '\u{1ee42}'..='\u{1ee42}', '\u{1ee47}'..='\u{1ee47}', '\u{1ee49}'..='\u{1ee49}', - '\u{1ee4b}'..='\u{1ee4b}', '\u{1ee4d}'..='\u{1ee4f}', '\u{1ee51}'..='\u{1ee52}', - '\u{1ee54}'..='\u{1ee54}', '\u{1ee57}'..='\u{1ee57}', '\u{1ee59}'..='\u{1ee59}', - '\u{1ee5b}'..='\u{1ee5b}', '\u{1ee5d}'..='\u{1ee5d}', '\u{1ee5f}'..='\u{1ee5f}', - '\u{1ee61}'..='\u{1ee62}', '\u{1ee64}'..='\u{1ee64}', '\u{1ee67}'..='\u{1ee6a}', - '\u{1ee6c}'..='\u{1ee72}', '\u{1ee74}'..='\u{1ee77}', '\u{1ee79}'..='\u{1ee7c}', - '\u{1ee7e}'..='\u{1ee7e}', '\u{1ee80}'..='\u{1ee89}', '\u{1ee8b}'..='\u{1ee9b}', - '\u{1eea1}'..='\u{1eea3}', '\u{1eea5}'..='\u{1eea9}', '\u{1eeab}'..='\u{1eebb}', - '\u{1f130}'..='\u{1f149}', '\u{1f150}'..='\u{1f169}', '\u{1f170}'..='\u{1f189}', - '\u{20000}'..='\u{2a6df}', '\u{2a700}'..='\u{2b81d}', '\u{2b820}'..='\u{2cead}', - '\u{2ceb0}'..='\u{2ebe0}', '\u{2ebf0}'..='\u{2ee5d}', '\u{2f800}'..='\u{2fa1d}', - '\u{30000}'..='\u{3134a}', '\u{31350}'..='\u{33479}', -]; - -#[rustfmt::skip] -pub(super) static CASE_IGNORABLE: &[RangeInclusive; 459] = &[ - '\u{a8}'..='\u{a8}', '\u{ad}'..='\u{ad}', '\u{af}'..='\u{af}', '\u{b4}'..='\u{b4}', - '\u{b7}'..='\u{b8}', '\u{2b0}'..='\u{36f}', '\u{374}'..='\u{375}', '\u{37a}'..='\u{37a}', - '\u{384}'..='\u{385}', '\u{387}'..='\u{387}', '\u{483}'..='\u{489}', '\u{559}'..='\u{559}', - '\u{55f}'..='\u{55f}', '\u{591}'..='\u{5bd}', '\u{5bf}'..='\u{5bf}', '\u{5c1}'..='\u{5c2}', - '\u{5c4}'..='\u{5c5}', '\u{5c7}'..='\u{5c7}', '\u{5f4}'..='\u{5f4}', '\u{600}'..='\u{605}', - '\u{610}'..='\u{61a}', '\u{61c}'..='\u{61c}', '\u{640}'..='\u{640}', '\u{64b}'..='\u{65f}', - '\u{670}'..='\u{670}', '\u{6d6}'..='\u{6dd}', '\u{6df}'..='\u{6e8}', '\u{6ea}'..='\u{6ed}', - '\u{70f}'..='\u{70f}', '\u{711}'..='\u{711}', '\u{730}'..='\u{74a}', '\u{7a6}'..='\u{7b0}', - '\u{7eb}'..='\u{7f5}', '\u{7fa}'..='\u{7fa}', '\u{7fd}'..='\u{7fd}', '\u{816}'..='\u{82d}', - '\u{859}'..='\u{85b}', '\u{888}'..='\u{888}', '\u{890}'..='\u{891}', '\u{897}'..='\u{89f}', - '\u{8c9}'..='\u{902}', '\u{93a}'..='\u{93a}', '\u{93c}'..='\u{93c}', '\u{941}'..='\u{948}', - '\u{94d}'..='\u{94d}', '\u{951}'..='\u{957}', '\u{962}'..='\u{963}', '\u{971}'..='\u{971}', - '\u{981}'..='\u{981}', '\u{9bc}'..='\u{9bc}', '\u{9c1}'..='\u{9c4}', '\u{9cd}'..='\u{9cd}', - '\u{9e2}'..='\u{9e3}', '\u{9fe}'..='\u{9fe}', '\u{a01}'..='\u{a02}', '\u{a3c}'..='\u{a3c}', - '\u{a41}'..='\u{a42}', '\u{a47}'..='\u{a48}', '\u{a4b}'..='\u{a4d}', '\u{a51}'..='\u{a51}', - '\u{a70}'..='\u{a71}', '\u{a75}'..='\u{a75}', '\u{a81}'..='\u{a82}', '\u{abc}'..='\u{abc}', - '\u{ac1}'..='\u{ac5}', '\u{ac7}'..='\u{ac8}', '\u{acd}'..='\u{acd}', '\u{ae2}'..='\u{ae3}', - '\u{afa}'..='\u{aff}', '\u{b01}'..='\u{b01}', '\u{b3c}'..='\u{b3c}', '\u{b3f}'..='\u{b3f}', - '\u{b41}'..='\u{b44}', '\u{b4d}'..='\u{b4d}', '\u{b55}'..='\u{b56}', '\u{b62}'..='\u{b63}', - '\u{b82}'..='\u{b82}', '\u{bc0}'..='\u{bc0}', '\u{bcd}'..='\u{bcd}', '\u{c00}'..='\u{c00}', - '\u{c04}'..='\u{c04}', '\u{c3c}'..='\u{c3c}', '\u{c3e}'..='\u{c40}', '\u{c46}'..='\u{c48}', - '\u{c4a}'..='\u{c4d}', '\u{c55}'..='\u{c56}', '\u{c62}'..='\u{c63}', '\u{c81}'..='\u{c81}', - '\u{cbc}'..='\u{cbc}', '\u{cbf}'..='\u{cbf}', '\u{cc6}'..='\u{cc6}', '\u{ccc}'..='\u{ccd}', - '\u{ce2}'..='\u{ce3}', '\u{d00}'..='\u{d01}', '\u{d3b}'..='\u{d3c}', '\u{d41}'..='\u{d44}', - '\u{d4d}'..='\u{d4d}', '\u{d62}'..='\u{d63}', '\u{d81}'..='\u{d81}', '\u{dca}'..='\u{dca}', - '\u{dd2}'..='\u{dd4}', '\u{dd6}'..='\u{dd6}', '\u{e31}'..='\u{e31}', '\u{e34}'..='\u{e3a}', - '\u{e46}'..='\u{e4e}', '\u{eb1}'..='\u{eb1}', '\u{eb4}'..='\u{ebc}', '\u{ec6}'..='\u{ec6}', - '\u{ec8}'..='\u{ece}', '\u{f18}'..='\u{f19}', '\u{f35}'..='\u{f35}', '\u{f37}'..='\u{f37}', - '\u{f39}'..='\u{f39}', '\u{f71}'..='\u{f7e}', '\u{f80}'..='\u{f84}', '\u{f86}'..='\u{f87}', - '\u{f8d}'..='\u{f97}', '\u{f99}'..='\u{fbc}', '\u{fc6}'..='\u{fc6}', - '\u{102d}'..='\u{1030}', '\u{1032}'..='\u{1037}', '\u{1039}'..='\u{103a}', - '\u{103d}'..='\u{103e}', '\u{1058}'..='\u{1059}', '\u{105e}'..='\u{1060}', - '\u{1071}'..='\u{1074}', '\u{1082}'..='\u{1082}', '\u{1085}'..='\u{1086}', - '\u{108d}'..='\u{108d}', '\u{109d}'..='\u{109d}', '\u{10fc}'..='\u{10fc}', - '\u{135d}'..='\u{135f}', '\u{1712}'..='\u{1714}', '\u{1732}'..='\u{1733}', - '\u{1752}'..='\u{1753}', '\u{1772}'..='\u{1773}', '\u{17b4}'..='\u{17b5}', - '\u{17b7}'..='\u{17bd}', '\u{17c6}'..='\u{17c6}', '\u{17c9}'..='\u{17d3}', - '\u{17d7}'..='\u{17d7}', '\u{17dd}'..='\u{17dd}', '\u{180b}'..='\u{180f}', - '\u{1843}'..='\u{1843}', '\u{1885}'..='\u{1886}', '\u{18a9}'..='\u{18a9}', - '\u{1920}'..='\u{1922}', '\u{1927}'..='\u{1928}', '\u{1932}'..='\u{1932}', - '\u{1939}'..='\u{193b}', '\u{1a17}'..='\u{1a18}', '\u{1a1b}'..='\u{1a1b}', - '\u{1a56}'..='\u{1a56}', '\u{1a58}'..='\u{1a5e}', '\u{1a60}'..='\u{1a60}', - '\u{1a62}'..='\u{1a62}', '\u{1a65}'..='\u{1a6c}', '\u{1a73}'..='\u{1a7c}', - '\u{1a7f}'..='\u{1a7f}', '\u{1aa7}'..='\u{1aa7}', '\u{1ab0}'..='\u{1add}', - '\u{1ae0}'..='\u{1aeb}', '\u{1b00}'..='\u{1b03}', '\u{1b34}'..='\u{1b34}', - '\u{1b36}'..='\u{1b3a}', '\u{1b3c}'..='\u{1b3c}', '\u{1b42}'..='\u{1b42}', - '\u{1b6b}'..='\u{1b73}', '\u{1b80}'..='\u{1b81}', '\u{1ba2}'..='\u{1ba5}', - '\u{1ba8}'..='\u{1ba9}', '\u{1bab}'..='\u{1bad}', '\u{1be6}'..='\u{1be6}', - '\u{1be8}'..='\u{1be9}', '\u{1bed}'..='\u{1bed}', '\u{1bef}'..='\u{1bf1}', - '\u{1c2c}'..='\u{1c33}', '\u{1c36}'..='\u{1c37}', '\u{1c78}'..='\u{1c7d}', - '\u{1cd0}'..='\u{1cd2}', '\u{1cd4}'..='\u{1ce0}', '\u{1ce2}'..='\u{1ce8}', - '\u{1ced}'..='\u{1ced}', '\u{1cf4}'..='\u{1cf4}', '\u{1cf8}'..='\u{1cf9}', - '\u{1d2c}'..='\u{1d6a}', '\u{1d78}'..='\u{1d78}', '\u{1d9b}'..='\u{1dff}', - '\u{1fbd}'..='\u{1fbd}', '\u{1fbf}'..='\u{1fc1}', '\u{1fcd}'..='\u{1fcf}', - '\u{1fdd}'..='\u{1fdf}', '\u{1fed}'..='\u{1fef}', '\u{1ffd}'..='\u{1ffe}', - '\u{200b}'..='\u{200f}', '\u{2018}'..='\u{2019}', '\u{2024}'..='\u{2024}', - '\u{2027}'..='\u{2027}', '\u{202a}'..='\u{202e}', '\u{2060}'..='\u{2064}', - '\u{2066}'..='\u{206f}', '\u{2071}'..='\u{2071}', '\u{207f}'..='\u{207f}', - '\u{2090}'..='\u{209c}', '\u{20d0}'..='\u{20f0}', '\u{2c7c}'..='\u{2c7d}', - '\u{2cef}'..='\u{2cf1}', '\u{2d6f}'..='\u{2d6f}', '\u{2d7f}'..='\u{2d7f}', - '\u{2de0}'..='\u{2dff}', '\u{2e2f}'..='\u{2e2f}', '\u{3005}'..='\u{3005}', - '\u{302a}'..='\u{302d}', '\u{3031}'..='\u{3035}', '\u{303b}'..='\u{303b}', - '\u{3099}'..='\u{309e}', '\u{30fc}'..='\u{30fe}', '\u{a015}'..='\u{a015}', - '\u{a4f8}'..='\u{a4fd}', '\u{a60c}'..='\u{a60c}', '\u{a66f}'..='\u{a672}', - '\u{a674}'..='\u{a67d}', '\u{a67f}'..='\u{a67f}', '\u{a69c}'..='\u{a69f}', - '\u{a6f0}'..='\u{a6f1}', '\u{a700}'..='\u{a721}', '\u{a770}'..='\u{a770}', - '\u{a788}'..='\u{a78a}', '\u{a7f1}'..='\u{a7f4}', '\u{a7f8}'..='\u{a7f9}', - '\u{a802}'..='\u{a802}', '\u{a806}'..='\u{a806}', '\u{a80b}'..='\u{a80b}', - '\u{a825}'..='\u{a826}', '\u{a82c}'..='\u{a82c}', '\u{a8c4}'..='\u{a8c5}', - '\u{a8e0}'..='\u{a8f1}', '\u{a8ff}'..='\u{a8ff}', '\u{a926}'..='\u{a92d}', - '\u{a947}'..='\u{a951}', '\u{a980}'..='\u{a982}', '\u{a9b3}'..='\u{a9b3}', - '\u{a9b6}'..='\u{a9b9}', '\u{a9bc}'..='\u{a9bd}', '\u{a9cf}'..='\u{a9cf}', - '\u{a9e5}'..='\u{a9e6}', '\u{aa29}'..='\u{aa2e}', '\u{aa31}'..='\u{aa32}', - '\u{aa35}'..='\u{aa36}', '\u{aa43}'..='\u{aa43}', '\u{aa4c}'..='\u{aa4c}', - '\u{aa70}'..='\u{aa70}', '\u{aa7c}'..='\u{aa7c}', '\u{aab0}'..='\u{aab0}', - '\u{aab2}'..='\u{aab4}', '\u{aab7}'..='\u{aab8}', '\u{aabe}'..='\u{aabf}', - '\u{aac1}'..='\u{aac1}', '\u{aadd}'..='\u{aadd}', '\u{aaec}'..='\u{aaed}', - '\u{aaf3}'..='\u{aaf4}', '\u{aaf6}'..='\u{aaf6}', '\u{ab5b}'..='\u{ab5f}', - '\u{ab69}'..='\u{ab6b}', '\u{abe5}'..='\u{abe5}', '\u{abe8}'..='\u{abe8}', - '\u{abed}'..='\u{abed}', '\u{fb1e}'..='\u{fb1e}', '\u{fbb2}'..='\u{fbc2}', - '\u{fe00}'..='\u{fe0f}', '\u{fe13}'..='\u{fe13}', '\u{fe20}'..='\u{fe2f}', - '\u{fe52}'..='\u{fe52}', '\u{fe55}'..='\u{fe55}', '\u{feff}'..='\u{feff}', - '\u{ff07}'..='\u{ff07}', '\u{ff0e}'..='\u{ff0e}', '\u{ff1a}'..='\u{ff1a}', - '\u{ff3e}'..='\u{ff3e}', '\u{ff40}'..='\u{ff40}', '\u{ff70}'..='\u{ff70}', - '\u{ff9e}'..='\u{ff9f}', '\u{ffe3}'..='\u{ffe3}', '\u{fff9}'..='\u{fffb}', - '\u{101fd}'..='\u{101fd}', '\u{102e0}'..='\u{102e0}', '\u{10376}'..='\u{1037a}', - '\u{10780}'..='\u{10785}', '\u{10787}'..='\u{107b0}', '\u{107b2}'..='\u{107ba}', - '\u{10a01}'..='\u{10a03}', '\u{10a05}'..='\u{10a06}', '\u{10a0c}'..='\u{10a0f}', - '\u{10a38}'..='\u{10a3a}', '\u{10a3f}'..='\u{10a3f}', '\u{10ae5}'..='\u{10ae6}', - '\u{10d24}'..='\u{10d27}', '\u{10d4e}'..='\u{10d4e}', '\u{10d69}'..='\u{10d6d}', - '\u{10d6f}'..='\u{10d6f}', '\u{10eab}'..='\u{10eac}', '\u{10ec5}'..='\u{10ec5}', - '\u{10efa}'..='\u{10eff}', '\u{10f46}'..='\u{10f50}', '\u{10f82}'..='\u{10f85}', - '\u{11001}'..='\u{11001}', '\u{11038}'..='\u{11046}', '\u{11070}'..='\u{11070}', - '\u{11073}'..='\u{11074}', '\u{1107f}'..='\u{11081}', '\u{110b3}'..='\u{110b6}', - '\u{110b9}'..='\u{110ba}', '\u{110bd}'..='\u{110bd}', '\u{110c2}'..='\u{110c2}', - '\u{110cd}'..='\u{110cd}', '\u{11100}'..='\u{11102}', '\u{11127}'..='\u{1112b}', - '\u{1112d}'..='\u{11134}', '\u{11173}'..='\u{11173}', '\u{11180}'..='\u{11181}', - '\u{111b6}'..='\u{111be}', '\u{111c9}'..='\u{111cc}', '\u{111cf}'..='\u{111cf}', - '\u{1122f}'..='\u{11231}', '\u{11234}'..='\u{11234}', '\u{11236}'..='\u{11237}', - '\u{1123e}'..='\u{1123e}', '\u{11241}'..='\u{11241}', '\u{112df}'..='\u{112df}', - '\u{112e3}'..='\u{112ea}', '\u{11300}'..='\u{11301}', '\u{1133b}'..='\u{1133c}', - '\u{11340}'..='\u{11340}', '\u{11366}'..='\u{1136c}', '\u{11370}'..='\u{11374}', - '\u{113bb}'..='\u{113c0}', '\u{113ce}'..='\u{113ce}', '\u{113d0}'..='\u{113d0}', - '\u{113d2}'..='\u{113d2}', '\u{113e1}'..='\u{113e2}', '\u{11438}'..='\u{1143f}', - '\u{11442}'..='\u{11444}', '\u{11446}'..='\u{11446}', '\u{1145e}'..='\u{1145e}', - '\u{114b3}'..='\u{114b8}', '\u{114ba}'..='\u{114ba}', '\u{114bf}'..='\u{114c0}', - '\u{114c2}'..='\u{114c3}', '\u{115b2}'..='\u{115b5}', '\u{115bc}'..='\u{115bd}', - '\u{115bf}'..='\u{115c0}', '\u{115dc}'..='\u{115dd}', '\u{11633}'..='\u{1163a}', - '\u{1163d}'..='\u{1163d}', '\u{1163f}'..='\u{11640}', '\u{116ab}'..='\u{116ab}', - '\u{116ad}'..='\u{116ad}', '\u{116b0}'..='\u{116b5}', '\u{116b7}'..='\u{116b7}', - '\u{1171d}'..='\u{1171d}', '\u{1171f}'..='\u{1171f}', '\u{11722}'..='\u{11725}', - '\u{11727}'..='\u{1172b}', '\u{1182f}'..='\u{11837}', '\u{11839}'..='\u{1183a}', - '\u{1193b}'..='\u{1193c}', '\u{1193e}'..='\u{1193e}', '\u{11943}'..='\u{11943}', - '\u{119d4}'..='\u{119d7}', '\u{119da}'..='\u{119db}', '\u{119e0}'..='\u{119e0}', - '\u{11a01}'..='\u{11a0a}', '\u{11a33}'..='\u{11a38}', '\u{11a3b}'..='\u{11a3e}', - '\u{11a47}'..='\u{11a47}', '\u{11a51}'..='\u{11a56}', '\u{11a59}'..='\u{11a5b}', - '\u{11a8a}'..='\u{11a96}', '\u{11a98}'..='\u{11a99}', '\u{11b60}'..='\u{11b60}', - '\u{11b62}'..='\u{11b64}', '\u{11b66}'..='\u{11b66}', '\u{11c30}'..='\u{11c36}', - '\u{11c38}'..='\u{11c3d}', '\u{11c3f}'..='\u{11c3f}', '\u{11c92}'..='\u{11ca7}', - '\u{11caa}'..='\u{11cb0}', '\u{11cb2}'..='\u{11cb3}', '\u{11cb5}'..='\u{11cb6}', - '\u{11d31}'..='\u{11d36}', '\u{11d3a}'..='\u{11d3a}', '\u{11d3c}'..='\u{11d3d}', - '\u{11d3f}'..='\u{11d45}', '\u{11d47}'..='\u{11d47}', '\u{11d90}'..='\u{11d91}', - '\u{11d95}'..='\u{11d95}', '\u{11d97}'..='\u{11d97}', '\u{11dd9}'..='\u{11dd9}', - '\u{11ef3}'..='\u{11ef4}', '\u{11f00}'..='\u{11f01}', '\u{11f36}'..='\u{11f3a}', - '\u{11f40}'..='\u{11f40}', '\u{11f42}'..='\u{11f42}', '\u{11f5a}'..='\u{11f5a}', - '\u{13430}'..='\u{13440}', '\u{13447}'..='\u{13455}', '\u{1611e}'..='\u{16129}', - '\u{1612d}'..='\u{1612f}', '\u{16af0}'..='\u{16af4}', '\u{16b30}'..='\u{16b36}', - '\u{16b40}'..='\u{16b43}', '\u{16d40}'..='\u{16d42}', '\u{16d6b}'..='\u{16d6c}', - '\u{16f4f}'..='\u{16f4f}', '\u{16f8f}'..='\u{16f9f}', '\u{16fe0}'..='\u{16fe1}', - '\u{16fe3}'..='\u{16fe4}', '\u{16ff2}'..='\u{16ff3}', '\u{1aff0}'..='\u{1aff3}', - '\u{1aff5}'..='\u{1affb}', '\u{1affd}'..='\u{1affe}', '\u{1bc9d}'..='\u{1bc9e}', - '\u{1bca0}'..='\u{1bca3}', '\u{1cf00}'..='\u{1cf2d}', '\u{1cf30}'..='\u{1cf46}', - '\u{1d167}'..='\u{1d169}', '\u{1d173}'..='\u{1d182}', '\u{1d185}'..='\u{1d18b}', - '\u{1d1aa}'..='\u{1d1ad}', '\u{1d242}'..='\u{1d244}', '\u{1da00}'..='\u{1da36}', - '\u{1da3b}'..='\u{1da6c}', '\u{1da75}'..='\u{1da75}', '\u{1da84}'..='\u{1da84}', - '\u{1da9b}'..='\u{1da9f}', '\u{1daa1}'..='\u{1daaf}', '\u{1e000}'..='\u{1e006}', - '\u{1e008}'..='\u{1e018}', '\u{1e01b}'..='\u{1e021}', '\u{1e023}'..='\u{1e024}', - '\u{1e026}'..='\u{1e02a}', '\u{1e030}'..='\u{1e06d}', '\u{1e08f}'..='\u{1e08f}', - '\u{1e130}'..='\u{1e13d}', '\u{1e2ae}'..='\u{1e2ae}', '\u{1e2ec}'..='\u{1e2ef}', - '\u{1e4eb}'..='\u{1e4ef}', '\u{1e5ee}'..='\u{1e5ef}', '\u{1e6e3}'..='\u{1e6e3}', - '\u{1e6e6}'..='\u{1e6e6}', '\u{1e6ee}'..='\u{1e6ef}', '\u{1e6f5}'..='\u{1e6f5}', - '\u{1e6ff}'..='\u{1e6ff}', '\u{1e8d0}'..='\u{1e8d6}', '\u{1e944}'..='\u{1e94b}', - '\u{1f3fb}'..='\u{1f3ff}', '\u{e0001}'..='\u{e0001}', '\u{e0020}'..='\u{e007f}', - '\u{e0100}'..='\u{e01ef}', -]; - -#[rustfmt::skip] -pub(super) static CASED: &[RangeInclusive; 156] = &[ - '\u{aa}'..='\u{aa}', '\u{b5}'..='\u{b5}', '\u{ba}'..='\u{ba}', '\u{c0}'..='\u{d6}', - '\u{d8}'..='\u{f6}', '\u{f8}'..='\u{1ba}', '\u{1bc}'..='\u{1bf}', '\u{1c4}'..='\u{293}', - '\u{296}'..='\u{2b8}', '\u{2c0}'..='\u{2c1}', '\u{2e0}'..='\u{2e4}', '\u{345}'..='\u{345}', - '\u{370}'..='\u{373}', '\u{376}'..='\u{377}', '\u{37a}'..='\u{37d}', '\u{37f}'..='\u{37f}', - '\u{386}'..='\u{386}', '\u{388}'..='\u{38a}', '\u{38c}'..='\u{38c}', '\u{38e}'..='\u{3a1}', - '\u{3a3}'..='\u{3f5}', '\u{3f7}'..='\u{481}', '\u{48a}'..='\u{52f}', '\u{531}'..='\u{556}', - '\u{560}'..='\u{588}', '\u{10a0}'..='\u{10c5}', '\u{10c7}'..='\u{10c7}', - '\u{10cd}'..='\u{10cd}', '\u{10d0}'..='\u{10fa}', '\u{10fc}'..='\u{10ff}', - '\u{13a0}'..='\u{13f5}', '\u{13f8}'..='\u{13fd}', '\u{1c80}'..='\u{1c8a}', - '\u{1c90}'..='\u{1cba}', '\u{1cbd}'..='\u{1cbf}', '\u{1d00}'..='\u{1dbf}', - '\u{1e00}'..='\u{1f15}', '\u{1f18}'..='\u{1f1d}', '\u{1f20}'..='\u{1f45}', - '\u{1f48}'..='\u{1f4d}', '\u{1f50}'..='\u{1f57}', '\u{1f59}'..='\u{1f59}', - '\u{1f5b}'..='\u{1f5b}', '\u{1f5d}'..='\u{1f5d}', '\u{1f5f}'..='\u{1f7d}', - '\u{1f80}'..='\u{1fb4}', '\u{1fb6}'..='\u{1fbc}', '\u{1fbe}'..='\u{1fbe}', - '\u{1fc2}'..='\u{1fc4}', '\u{1fc6}'..='\u{1fcc}', '\u{1fd0}'..='\u{1fd3}', - '\u{1fd6}'..='\u{1fdb}', '\u{1fe0}'..='\u{1fec}', '\u{1ff2}'..='\u{1ff4}', - '\u{1ff6}'..='\u{1ffc}', '\u{2071}'..='\u{2071}', '\u{207f}'..='\u{207f}', - '\u{2090}'..='\u{209c}', '\u{2102}'..='\u{2102}', '\u{2107}'..='\u{2107}', - '\u{210a}'..='\u{2113}', '\u{2115}'..='\u{2115}', '\u{2119}'..='\u{211d}', - '\u{2124}'..='\u{2124}', '\u{2126}'..='\u{2126}', '\u{2128}'..='\u{2128}', - '\u{212a}'..='\u{212d}', '\u{212f}'..='\u{2134}', '\u{2139}'..='\u{2139}', - '\u{213c}'..='\u{213f}', '\u{2145}'..='\u{2149}', '\u{214e}'..='\u{214e}', - '\u{2160}'..='\u{217f}', '\u{2183}'..='\u{2184}', '\u{24b6}'..='\u{24e9}', - '\u{2c00}'..='\u{2ce4}', '\u{2ceb}'..='\u{2cee}', '\u{2cf2}'..='\u{2cf3}', - '\u{2d00}'..='\u{2d25}', '\u{2d27}'..='\u{2d27}', '\u{2d2d}'..='\u{2d2d}', - '\u{a640}'..='\u{a66d}', '\u{a680}'..='\u{a69d}', '\u{a722}'..='\u{a787}', - '\u{a78b}'..='\u{a78e}', '\u{a790}'..='\u{a7dc}', '\u{a7f1}'..='\u{a7f6}', - '\u{a7f8}'..='\u{a7fa}', '\u{ab30}'..='\u{ab5a}', '\u{ab5c}'..='\u{ab69}', - '\u{ab70}'..='\u{abbf}', '\u{fb00}'..='\u{fb06}', '\u{fb13}'..='\u{fb17}', - '\u{ff21}'..='\u{ff3a}', '\u{ff41}'..='\u{ff5a}', '\u{10400}'..='\u{1044f}', - '\u{104b0}'..='\u{104d3}', '\u{104d8}'..='\u{104fb}', '\u{10570}'..='\u{1057a}', - '\u{1057c}'..='\u{1058a}', '\u{1058c}'..='\u{10592}', '\u{10594}'..='\u{10595}', - '\u{10597}'..='\u{105a1}', '\u{105a3}'..='\u{105b1}', '\u{105b3}'..='\u{105b9}', - '\u{105bb}'..='\u{105bc}', '\u{10780}'..='\u{10780}', '\u{10783}'..='\u{10785}', - '\u{10787}'..='\u{107b0}', '\u{107b2}'..='\u{107ba}', '\u{10c80}'..='\u{10cb2}', - '\u{10cc0}'..='\u{10cf2}', '\u{10d50}'..='\u{10d65}', '\u{10d70}'..='\u{10d85}', - '\u{118a0}'..='\u{118df}', '\u{16e40}'..='\u{16e7f}', '\u{16ea0}'..='\u{16eb8}', - '\u{16ebb}'..='\u{16ed3}', '\u{1d400}'..='\u{1d454}', '\u{1d456}'..='\u{1d49c}', - '\u{1d49e}'..='\u{1d49f}', '\u{1d4a2}'..='\u{1d4a2}', '\u{1d4a5}'..='\u{1d4a6}', - '\u{1d4a9}'..='\u{1d4ac}', '\u{1d4ae}'..='\u{1d4b9}', '\u{1d4bb}'..='\u{1d4bb}', - '\u{1d4bd}'..='\u{1d4c3}', '\u{1d4c5}'..='\u{1d505}', '\u{1d507}'..='\u{1d50a}', - '\u{1d50d}'..='\u{1d514}', '\u{1d516}'..='\u{1d51c}', '\u{1d51e}'..='\u{1d539}', - '\u{1d53b}'..='\u{1d53e}', '\u{1d540}'..='\u{1d544}', '\u{1d546}'..='\u{1d546}', - '\u{1d54a}'..='\u{1d550}', '\u{1d552}'..='\u{1d6a5}', '\u{1d6a8}'..='\u{1d6c0}', - '\u{1d6c2}'..='\u{1d6da}', '\u{1d6dc}'..='\u{1d6fa}', '\u{1d6fc}'..='\u{1d714}', - '\u{1d716}'..='\u{1d734}', '\u{1d736}'..='\u{1d74e}', '\u{1d750}'..='\u{1d76e}', - '\u{1d770}'..='\u{1d788}', '\u{1d78a}'..='\u{1d7a8}', '\u{1d7aa}'..='\u{1d7c2}', - '\u{1d7c4}'..='\u{1d7cb}', '\u{1df00}'..='\u{1df09}', '\u{1df0b}'..='\u{1df1e}', - '\u{1df25}'..='\u{1df2a}', '\u{1e030}'..='\u{1e06d}', '\u{1e900}'..='\u{1e943}', - '\u{1f130}'..='\u{1f149}', '\u{1f150}'..='\u{1f169}', '\u{1f170}'..='\u{1f189}', -]; - -#[rustfmt::skip] -pub(super) static GRAPHEME_EXTEND: &[RangeInclusive; 383] = &[ - '\u{300}'..='\u{36f}', '\u{483}'..='\u{489}', '\u{591}'..='\u{5bd}', '\u{5bf}'..='\u{5bf}', - '\u{5c1}'..='\u{5c2}', '\u{5c4}'..='\u{5c5}', '\u{5c7}'..='\u{5c7}', '\u{610}'..='\u{61a}', - '\u{64b}'..='\u{65f}', '\u{670}'..='\u{670}', '\u{6d6}'..='\u{6dc}', '\u{6df}'..='\u{6e4}', - '\u{6e7}'..='\u{6e8}', '\u{6ea}'..='\u{6ed}', '\u{711}'..='\u{711}', '\u{730}'..='\u{74a}', - '\u{7a6}'..='\u{7b0}', '\u{7eb}'..='\u{7f3}', '\u{7fd}'..='\u{7fd}', '\u{816}'..='\u{819}', - '\u{81b}'..='\u{823}', '\u{825}'..='\u{827}', '\u{829}'..='\u{82d}', '\u{859}'..='\u{85b}', - '\u{897}'..='\u{89f}', '\u{8ca}'..='\u{8e1}', '\u{8e3}'..='\u{902}', '\u{93a}'..='\u{93a}', - '\u{93c}'..='\u{93c}', '\u{941}'..='\u{948}', '\u{94d}'..='\u{94d}', '\u{951}'..='\u{957}', - '\u{962}'..='\u{963}', '\u{981}'..='\u{981}', '\u{9bc}'..='\u{9bc}', '\u{9be}'..='\u{9be}', - '\u{9c1}'..='\u{9c4}', '\u{9cd}'..='\u{9cd}', '\u{9d7}'..='\u{9d7}', '\u{9e2}'..='\u{9e3}', - '\u{9fe}'..='\u{9fe}', '\u{a01}'..='\u{a02}', '\u{a3c}'..='\u{a3c}', '\u{a41}'..='\u{a42}', - '\u{a47}'..='\u{a48}', '\u{a4b}'..='\u{a4d}', '\u{a51}'..='\u{a51}', '\u{a70}'..='\u{a71}', - '\u{a75}'..='\u{a75}', '\u{a81}'..='\u{a82}', '\u{abc}'..='\u{abc}', '\u{ac1}'..='\u{ac5}', - '\u{ac7}'..='\u{ac8}', '\u{acd}'..='\u{acd}', '\u{ae2}'..='\u{ae3}', '\u{afa}'..='\u{aff}', - '\u{b01}'..='\u{b01}', '\u{b3c}'..='\u{b3c}', '\u{b3e}'..='\u{b3f}', '\u{b41}'..='\u{b44}', - '\u{b4d}'..='\u{b4d}', '\u{b55}'..='\u{b57}', '\u{b62}'..='\u{b63}', '\u{b82}'..='\u{b82}', - '\u{bbe}'..='\u{bbe}', '\u{bc0}'..='\u{bc0}', '\u{bcd}'..='\u{bcd}', '\u{bd7}'..='\u{bd7}', - '\u{c00}'..='\u{c00}', '\u{c04}'..='\u{c04}', '\u{c3c}'..='\u{c3c}', '\u{c3e}'..='\u{c40}', - '\u{c46}'..='\u{c48}', '\u{c4a}'..='\u{c4d}', '\u{c55}'..='\u{c56}', '\u{c62}'..='\u{c63}', - '\u{c81}'..='\u{c81}', '\u{cbc}'..='\u{cbc}', '\u{cbf}'..='\u{cc0}', '\u{cc2}'..='\u{cc2}', - '\u{cc6}'..='\u{cc8}', '\u{cca}'..='\u{ccd}', '\u{cd5}'..='\u{cd6}', '\u{ce2}'..='\u{ce3}', - '\u{d00}'..='\u{d01}', '\u{d3b}'..='\u{d3c}', '\u{d3e}'..='\u{d3e}', '\u{d41}'..='\u{d44}', - '\u{d4d}'..='\u{d4d}', '\u{d57}'..='\u{d57}', '\u{d62}'..='\u{d63}', '\u{d81}'..='\u{d81}', - '\u{dca}'..='\u{dca}', '\u{dcf}'..='\u{dcf}', '\u{dd2}'..='\u{dd4}', '\u{dd6}'..='\u{dd6}', - '\u{ddf}'..='\u{ddf}', '\u{e31}'..='\u{e31}', '\u{e34}'..='\u{e3a}', '\u{e47}'..='\u{e4e}', - '\u{eb1}'..='\u{eb1}', '\u{eb4}'..='\u{ebc}', '\u{ec8}'..='\u{ece}', '\u{f18}'..='\u{f19}', - '\u{f35}'..='\u{f35}', '\u{f37}'..='\u{f37}', '\u{f39}'..='\u{f39}', '\u{f71}'..='\u{f7e}', - '\u{f80}'..='\u{f84}', '\u{f86}'..='\u{f87}', '\u{f8d}'..='\u{f97}', '\u{f99}'..='\u{fbc}', - '\u{fc6}'..='\u{fc6}', '\u{102d}'..='\u{1030}', '\u{1032}'..='\u{1037}', - '\u{1039}'..='\u{103a}', '\u{103d}'..='\u{103e}', '\u{1058}'..='\u{1059}', - '\u{105e}'..='\u{1060}', '\u{1071}'..='\u{1074}', '\u{1082}'..='\u{1082}', - '\u{1085}'..='\u{1086}', '\u{108d}'..='\u{108d}', '\u{109d}'..='\u{109d}', - '\u{135d}'..='\u{135f}', '\u{1712}'..='\u{1715}', '\u{1732}'..='\u{1734}', - '\u{1752}'..='\u{1753}', '\u{1772}'..='\u{1773}', '\u{17b4}'..='\u{17b5}', - '\u{17b7}'..='\u{17bd}', '\u{17c6}'..='\u{17c6}', '\u{17c9}'..='\u{17d3}', - '\u{17dd}'..='\u{17dd}', '\u{180b}'..='\u{180d}', '\u{180f}'..='\u{180f}', - '\u{1885}'..='\u{1886}', '\u{18a9}'..='\u{18a9}', '\u{1920}'..='\u{1922}', - '\u{1927}'..='\u{1928}', '\u{1932}'..='\u{1932}', '\u{1939}'..='\u{193b}', - '\u{1a17}'..='\u{1a18}', '\u{1a1b}'..='\u{1a1b}', '\u{1a56}'..='\u{1a56}', - '\u{1a58}'..='\u{1a5e}', '\u{1a60}'..='\u{1a60}', '\u{1a62}'..='\u{1a62}', - '\u{1a65}'..='\u{1a6c}', '\u{1a73}'..='\u{1a7c}', '\u{1a7f}'..='\u{1a7f}', - '\u{1ab0}'..='\u{1add}', '\u{1ae0}'..='\u{1aeb}', '\u{1b00}'..='\u{1b03}', - '\u{1b34}'..='\u{1b3d}', '\u{1b42}'..='\u{1b44}', '\u{1b6b}'..='\u{1b73}', - '\u{1b80}'..='\u{1b81}', '\u{1ba2}'..='\u{1ba5}', '\u{1ba8}'..='\u{1bad}', - '\u{1be6}'..='\u{1be6}', '\u{1be8}'..='\u{1be9}', '\u{1bed}'..='\u{1bed}', - '\u{1bef}'..='\u{1bf3}', '\u{1c2c}'..='\u{1c33}', '\u{1c36}'..='\u{1c37}', - '\u{1cd0}'..='\u{1cd2}', '\u{1cd4}'..='\u{1ce0}', '\u{1ce2}'..='\u{1ce8}', - '\u{1ced}'..='\u{1ced}', '\u{1cf4}'..='\u{1cf4}', '\u{1cf8}'..='\u{1cf9}', - '\u{1dc0}'..='\u{1dff}', '\u{200c}'..='\u{200c}', '\u{20d0}'..='\u{20f0}', - '\u{2cef}'..='\u{2cf1}', '\u{2d7f}'..='\u{2d7f}', '\u{2de0}'..='\u{2dff}', - '\u{302a}'..='\u{302f}', '\u{3099}'..='\u{309a}', '\u{a66f}'..='\u{a672}', - '\u{a674}'..='\u{a67d}', '\u{a69e}'..='\u{a69f}', '\u{a6f0}'..='\u{a6f1}', - '\u{a802}'..='\u{a802}', '\u{a806}'..='\u{a806}', '\u{a80b}'..='\u{a80b}', - '\u{a825}'..='\u{a826}', '\u{a82c}'..='\u{a82c}', '\u{a8c4}'..='\u{a8c5}', - '\u{a8e0}'..='\u{a8f1}', '\u{a8ff}'..='\u{a8ff}', '\u{a926}'..='\u{a92d}', - '\u{a947}'..='\u{a951}', '\u{a953}'..='\u{a953}', '\u{a980}'..='\u{a982}', - '\u{a9b3}'..='\u{a9b3}', '\u{a9b6}'..='\u{a9b9}', '\u{a9bc}'..='\u{a9bd}', - '\u{a9c0}'..='\u{a9c0}', '\u{a9e5}'..='\u{a9e5}', '\u{aa29}'..='\u{aa2e}', - '\u{aa31}'..='\u{aa32}', '\u{aa35}'..='\u{aa36}', '\u{aa43}'..='\u{aa43}', - '\u{aa4c}'..='\u{aa4c}', '\u{aa7c}'..='\u{aa7c}', '\u{aab0}'..='\u{aab0}', - '\u{aab2}'..='\u{aab4}', '\u{aab7}'..='\u{aab8}', '\u{aabe}'..='\u{aabf}', - '\u{aac1}'..='\u{aac1}', '\u{aaec}'..='\u{aaed}', '\u{aaf6}'..='\u{aaf6}', - '\u{abe5}'..='\u{abe5}', '\u{abe8}'..='\u{abe8}', '\u{abed}'..='\u{abed}', - '\u{fb1e}'..='\u{fb1e}', '\u{fe00}'..='\u{fe0f}', '\u{fe20}'..='\u{fe2f}', - '\u{ff9e}'..='\u{ff9f}', '\u{101fd}'..='\u{101fd}', '\u{102e0}'..='\u{102e0}', - '\u{10376}'..='\u{1037a}', '\u{10a01}'..='\u{10a03}', '\u{10a05}'..='\u{10a06}', - '\u{10a0c}'..='\u{10a0f}', '\u{10a38}'..='\u{10a3a}', '\u{10a3f}'..='\u{10a3f}', - '\u{10ae5}'..='\u{10ae6}', '\u{10d24}'..='\u{10d27}', '\u{10d69}'..='\u{10d6d}', - '\u{10eab}'..='\u{10eac}', '\u{10efa}'..='\u{10eff}', '\u{10f46}'..='\u{10f50}', - '\u{10f82}'..='\u{10f85}', '\u{11001}'..='\u{11001}', '\u{11038}'..='\u{11046}', - '\u{11070}'..='\u{11070}', '\u{11073}'..='\u{11074}', '\u{1107f}'..='\u{11081}', - '\u{110b3}'..='\u{110b6}', '\u{110b9}'..='\u{110ba}', '\u{110c2}'..='\u{110c2}', - '\u{11100}'..='\u{11102}', '\u{11127}'..='\u{1112b}', '\u{1112d}'..='\u{11134}', - '\u{11173}'..='\u{11173}', '\u{11180}'..='\u{11181}', '\u{111b6}'..='\u{111be}', - '\u{111c0}'..='\u{111c0}', '\u{111c9}'..='\u{111cc}', '\u{111cf}'..='\u{111cf}', - '\u{1122f}'..='\u{11231}', '\u{11234}'..='\u{11237}', '\u{1123e}'..='\u{1123e}', - '\u{11241}'..='\u{11241}', '\u{112df}'..='\u{112df}', '\u{112e3}'..='\u{112ea}', - '\u{11300}'..='\u{11301}', '\u{1133b}'..='\u{1133c}', '\u{1133e}'..='\u{1133e}', - '\u{11340}'..='\u{11340}', '\u{1134d}'..='\u{1134d}', '\u{11357}'..='\u{11357}', - '\u{11366}'..='\u{1136c}', '\u{11370}'..='\u{11374}', '\u{113b8}'..='\u{113b8}', - '\u{113bb}'..='\u{113c0}', '\u{113c2}'..='\u{113c2}', '\u{113c5}'..='\u{113c5}', - '\u{113c7}'..='\u{113c9}', '\u{113ce}'..='\u{113d0}', '\u{113d2}'..='\u{113d2}', - '\u{113e1}'..='\u{113e2}', '\u{11438}'..='\u{1143f}', '\u{11442}'..='\u{11444}', - '\u{11446}'..='\u{11446}', '\u{1145e}'..='\u{1145e}', '\u{114b0}'..='\u{114b0}', - '\u{114b3}'..='\u{114b8}', '\u{114ba}'..='\u{114ba}', '\u{114bd}'..='\u{114bd}', - '\u{114bf}'..='\u{114c0}', '\u{114c2}'..='\u{114c3}', '\u{115af}'..='\u{115af}', - '\u{115b2}'..='\u{115b5}', '\u{115bc}'..='\u{115bd}', '\u{115bf}'..='\u{115c0}', - '\u{115dc}'..='\u{115dd}', '\u{11633}'..='\u{1163a}', '\u{1163d}'..='\u{1163d}', - '\u{1163f}'..='\u{11640}', '\u{116ab}'..='\u{116ab}', '\u{116ad}'..='\u{116ad}', - '\u{116b0}'..='\u{116b7}', '\u{1171d}'..='\u{1171d}', '\u{1171f}'..='\u{1171f}', - '\u{11722}'..='\u{11725}', '\u{11727}'..='\u{1172b}', '\u{1182f}'..='\u{11837}', - '\u{11839}'..='\u{1183a}', '\u{11930}'..='\u{11930}', '\u{1193b}'..='\u{1193e}', - '\u{11943}'..='\u{11943}', '\u{119d4}'..='\u{119d7}', '\u{119da}'..='\u{119db}', - '\u{119e0}'..='\u{119e0}', '\u{11a01}'..='\u{11a0a}', '\u{11a33}'..='\u{11a38}', - '\u{11a3b}'..='\u{11a3e}', '\u{11a47}'..='\u{11a47}', '\u{11a51}'..='\u{11a56}', - '\u{11a59}'..='\u{11a5b}', '\u{11a8a}'..='\u{11a96}', '\u{11a98}'..='\u{11a99}', - '\u{11b60}'..='\u{11b60}', '\u{11b62}'..='\u{11b64}', '\u{11b66}'..='\u{11b66}', - '\u{11c30}'..='\u{11c36}', '\u{11c38}'..='\u{11c3d}', '\u{11c3f}'..='\u{11c3f}', - '\u{11c92}'..='\u{11ca7}', '\u{11caa}'..='\u{11cb0}', '\u{11cb2}'..='\u{11cb3}', - '\u{11cb5}'..='\u{11cb6}', '\u{11d31}'..='\u{11d36}', '\u{11d3a}'..='\u{11d3a}', - '\u{11d3c}'..='\u{11d3d}', '\u{11d3f}'..='\u{11d45}', '\u{11d47}'..='\u{11d47}', - '\u{11d90}'..='\u{11d91}', '\u{11d95}'..='\u{11d95}', '\u{11d97}'..='\u{11d97}', - '\u{11ef3}'..='\u{11ef4}', '\u{11f00}'..='\u{11f01}', '\u{11f36}'..='\u{11f3a}', - '\u{11f40}'..='\u{11f42}', '\u{11f5a}'..='\u{11f5a}', '\u{13440}'..='\u{13440}', - '\u{13447}'..='\u{13455}', '\u{1611e}'..='\u{16129}', '\u{1612d}'..='\u{1612f}', - '\u{16af0}'..='\u{16af4}', '\u{16b30}'..='\u{16b36}', '\u{16f4f}'..='\u{16f4f}', - '\u{16f8f}'..='\u{16f92}', '\u{16fe4}'..='\u{16fe4}', '\u{16ff0}'..='\u{16ff1}', - '\u{1bc9d}'..='\u{1bc9e}', '\u{1cf00}'..='\u{1cf2d}', '\u{1cf30}'..='\u{1cf46}', - '\u{1d165}'..='\u{1d169}', '\u{1d16d}'..='\u{1d172}', '\u{1d17b}'..='\u{1d182}', - '\u{1d185}'..='\u{1d18b}', '\u{1d1aa}'..='\u{1d1ad}', '\u{1d242}'..='\u{1d244}', - '\u{1da00}'..='\u{1da36}', '\u{1da3b}'..='\u{1da6c}', '\u{1da75}'..='\u{1da75}', - '\u{1da84}'..='\u{1da84}', '\u{1da9b}'..='\u{1da9f}', '\u{1daa1}'..='\u{1daaf}', - '\u{1e000}'..='\u{1e006}', '\u{1e008}'..='\u{1e018}', '\u{1e01b}'..='\u{1e021}', - '\u{1e023}'..='\u{1e024}', '\u{1e026}'..='\u{1e02a}', '\u{1e08f}'..='\u{1e08f}', - '\u{1e130}'..='\u{1e136}', '\u{1e2ae}'..='\u{1e2ae}', '\u{1e2ec}'..='\u{1e2ef}', - '\u{1e4ec}'..='\u{1e4ef}', '\u{1e5ee}'..='\u{1e5ef}', '\u{1e6e3}'..='\u{1e6e3}', - '\u{1e6e6}'..='\u{1e6e6}', '\u{1e6ee}'..='\u{1e6ef}', '\u{1e6f5}'..='\u{1e6f5}', - '\u{1e8d0}'..='\u{1e8d6}', '\u{1e944}'..='\u{1e94a}', '\u{e0020}'..='\u{e007f}', - '\u{e0100}'..='\u{e01ef}', -]; - -#[rustfmt::skip] -pub(super) static LOWERCASE: &[RangeInclusive; 676] = &[ - '\u{aa}'..='\u{aa}', '\u{b5}'..='\u{b5}', '\u{ba}'..='\u{ba}', '\u{df}'..='\u{f6}', - '\u{f8}'..='\u{ff}', '\u{101}'..='\u{101}', '\u{103}'..='\u{103}', '\u{105}'..='\u{105}', - '\u{107}'..='\u{107}', '\u{109}'..='\u{109}', '\u{10b}'..='\u{10b}', '\u{10d}'..='\u{10d}', - '\u{10f}'..='\u{10f}', '\u{111}'..='\u{111}', '\u{113}'..='\u{113}', '\u{115}'..='\u{115}', - '\u{117}'..='\u{117}', '\u{119}'..='\u{119}', '\u{11b}'..='\u{11b}', '\u{11d}'..='\u{11d}', - '\u{11f}'..='\u{11f}', '\u{121}'..='\u{121}', '\u{123}'..='\u{123}', '\u{125}'..='\u{125}', - '\u{127}'..='\u{127}', '\u{129}'..='\u{129}', '\u{12b}'..='\u{12b}', '\u{12d}'..='\u{12d}', - '\u{12f}'..='\u{12f}', '\u{131}'..='\u{131}', '\u{133}'..='\u{133}', '\u{135}'..='\u{135}', - '\u{137}'..='\u{138}', '\u{13a}'..='\u{13a}', '\u{13c}'..='\u{13c}', '\u{13e}'..='\u{13e}', - '\u{140}'..='\u{140}', '\u{142}'..='\u{142}', '\u{144}'..='\u{144}', '\u{146}'..='\u{146}', - '\u{148}'..='\u{149}', '\u{14b}'..='\u{14b}', '\u{14d}'..='\u{14d}', '\u{14f}'..='\u{14f}', - '\u{151}'..='\u{151}', '\u{153}'..='\u{153}', '\u{155}'..='\u{155}', '\u{157}'..='\u{157}', - '\u{159}'..='\u{159}', '\u{15b}'..='\u{15b}', '\u{15d}'..='\u{15d}', '\u{15f}'..='\u{15f}', - '\u{161}'..='\u{161}', '\u{163}'..='\u{163}', '\u{165}'..='\u{165}', '\u{167}'..='\u{167}', - '\u{169}'..='\u{169}', '\u{16b}'..='\u{16b}', '\u{16d}'..='\u{16d}', '\u{16f}'..='\u{16f}', - '\u{171}'..='\u{171}', '\u{173}'..='\u{173}', '\u{175}'..='\u{175}', '\u{177}'..='\u{177}', - '\u{17a}'..='\u{17a}', '\u{17c}'..='\u{17c}', '\u{17e}'..='\u{180}', '\u{183}'..='\u{183}', - '\u{185}'..='\u{185}', '\u{188}'..='\u{188}', '\u{18c}'..='\u{18d}', '\u{192}'..='\u{192}', - '\u{195}'..='\u{195}', '\u{199}'..='\u{19b}', '\u{19e}'..='\u{19e}', '\u{1a1}'..='\u{1a1}', - '\u{1a3}'..='\u{1a3}', '\u{1a5}'..='\u{1a5}', '\u{1a8}'..='\u{1a8}', '\u{1aa}'..='\u{1ab}', - '\u{1ad}'..='\u{1ad}', '\u{1b0}'..='\u{1b0}', '\u{1b4}'..='\u{1b4}', '\u{1b6}'..='\u{1b6}', - '\u{1b9}'..='\u{1ba}', '\u{1bd}'..='\u{1bf}', '\u{1c6}'..='\u{1c6}', '\u{1c9}'..='\u{1c9}', - '\u{1cc}'..='\u{1cc}', '\u{1ce}'..='\u{1ce}', '\u{1d0}'..='\u{1d0}', '\u{1d2}'..='\u{1d2}', - '\u{1d4}'..='\u{1d4}', '\u{1d6}'..='\u{1d6}', '\u{1d8}'..='\u{1d8}', '\u{1da}'..='\u{1da}', - '\u{1dc}'..='\u{1dd}', '\u{1df}'..='\u{1df}', '\u{1e1}'..='\u{1e1}', '\u{1e3}'..='\u{1e3}', - '\u{1e5}'..='\u{1e5}', '\u{1e7}'..='\u{1e7}', '\u{1e9}'..='\u{1e9}', '\u{1eb}'..='\u{1eb}', - '\u{1ed}'..='\u{1ed}', '\u{1ef}'..='\u{1f0}', '\u{1f3}'..='\u{1f3}', '\u{1f5}'..='\u{1f5}', - '\u{1f9}'..='\u{1f9}', '\u{1fb}'..='\u{1fb}', '\u{1fd}'..='\u{1fd}', '\u{1ff}'..='\u{1ff}', - '\u{201}'..='\u{201}', '\u{203}'..='\u{203}', '\u{205}'..='\u{205}', '\u{207}'..='\u{207}', - '\u{209}'..='\u{209}', '\u{20b}'..='\u{20b}', '\u{20d}'..='\u{20d}', '\u{20f}'..='\u{20f}', - '\u{211}'..='\u{211}', '\u{213}'..='\u{213}', '\u{215}'..='\u{215}', '\u{217}'..='\u{217}', - '\u{219}'..='\u{219}', '\u{21b}'..='\u{21b}', '\u{21d}'..='\u{21d}', '\u{21f}'..='\u{21f}', - '\u{221}'..='\u{221}', '\u{223}'..='\u{223}', '\u{225}'..='\u{225}', '\u{227}'..='\u{227}', - '\u{229}'..='\u{229}', '\u{22b}'..='\u{22b}', '\u{22d}'..='\u{22d}', '\u{22f}'..='\u{22f}', - '\u{231}'..='\u{231}', '\u{233}'..='\u{239}', '\u{23c}'..='\u{23c}', '\u{23f}'..='\u{240}', - '\u{242}'..='\u{242}', '\u{247}'..='\u{247}', '\u{249}'..='\u{249}', '\u{24b}'..='\u{24b}', - '\u{24d}'..='\u{24d}', '\u{24f}'..='\u{293}', '\u{296}'..='\u{2b8}', '\u{2c0}'..='\u{2c1}', - '\u{2e0}'..='\u{2e4}', '\u{345}'..='\u{345}', '\u{371}'..='\u{371}', '\u{373}'..='\u{373}', - '\u{377}'..='\u{377}', '\u{37a}'..='\u{37d}', '\u{390}'..='\u{390}', '\u{3ac}'..='\u{3ce}', - '\u{3d0}'..='\u{3d1}', '\u{3d5}'..='\u{3d7}', '\u{3d9}'..='\u{3d9}', '\u{3db}'..='\u{3db}', - '\u{3dd}'..='\u{3dd}', '\u{3df}'..='\u{3df}', '\u{3e1}'..='\u{3e1}', '\u{3e3}'..='\u{3e3}', - '\u{3e5}'..='\u{3e5}', '\u{3e7}'..='\u{3e7}', '\u{3e9}'..='\u{3e9}', '\u{3eb}'..='\u{3eb}', - '\u{3ed}'..='\u{3ed}', '\u{3ef}'..='\u{3f3}', '\u{3f5}'..='\u{3f5}', '\u{3f8}'..='\u{3f8}', - '\u{3fb}'..='\u{3fc}', '\u{430}'..='\u{45f}', '\u{461}'..='\u{461}', '\u{463}'..='\u{463}', - '\u{465}'..='\u{465}', '\u{467}'..='\u{467}', '\u{469}'..='\u{469}', '\u{46b}'..='\u{46b}', - '\u{46d}'..='\u{46d}', '\u{46f}'..='\u{46f}', '\u{471}'..='\u{471}', '\u{473}'..='\u{473}', - '\u{475}'..='\u{475}', '\u{477}'..='\u{477}', '\u{479}'..='\u{479}', '\u{47b}'..='\u{47b}', - '\u{47d}'..='\u{47d}', '\u{47f}'..='\u{47f}', '\u{481}'..='\u{481}', '\u{48b}'..='\u{48b}', - '\u{48d}'..='\u{48d}', '\u{48f}'..='\u{48f}', '\u{491}'..='\u{491}', '\u{493}'..='\u{493}', - '\u{495}'..='\u{495}', '\u{497}'..='\u{497}', '\u{499}'..='\u{499}', '\u{49b}'..='\u{49b}', - '\u{49d}'..='\u{49d}', '\u{49f}'..='\u{49f}', '\u{4a1}'..='\u{4a1}', '\u{4a3}'..='\u{4a3}', - '\u{4a5}'..='\u{4a5}', '\u{4a7}'..='\u{4a7}', '\u{4a9}'..='\u{4a9}', '\u{4ab}'..='\u{4ab}', - '\u{4ad}'..='\u{4ad}', '\u{4af}'..='\u{4af}', '\u{4b1}'..='\u{4b1}', '\u{4b3}'..='\u{4b3}', - '\u{4b5}'..='\u{4b5}', '\u{4b7}'..='\u{4b7}', '\u{4b9}'..='\u{4b9}', '\u{4bb}'..='\u{4bb}', - '\u{4bd}'..='\u{4bd}', '\u{4bf}'..='\u{4bf}', '\u{4c2}'..='\u{4c2}', '\u{4c4}'..='\u{4c4}', - '\u{4c6}'..='\u{4c6}', '\u{4c8}'..='\u{4c8}', '\u{4ca}'..='\u{4ca}', '\u{4cc}'..='\u{4cc}', - '\u{4ce}'..='\u{4cf}', '\u{4d1}'..='\u{4d1}', '\u{4d3}'..='\u{4d3}', '\u{4d5}'..='\u{4d5}', - '\u{4d7}'..='\u{4d7}', '\u{4d9}'..='\u{4d9}', '\u{4db}'..='\u{4db}', '\u{4dd}'..='\u{4dd}', - '\u{4df}'..='\u{4df}', '\u{4e1}'..='\u{4e1}', '\u{4e3}'..='\u{4e3}', '\u{4e5}'..='\u{4e5}', - '\u{4e7}'..='\u{4e7}', '\u{4e9}'..='\u{4e9}', '\u{4eb}'..='\u{4eb}', '\u{4ed}'..='\u{4ed}', - '\u{4ef}'..='\u{4ef}', '\u{4f1}'..='\u{4f1}', '\u{4f3}'..='\u{4f3}', '\u{4f5}'..='\u{4f5}', - '\u{4f7}'..='\u{4f7}', '\u{4f9}'..='\u{4f9}', '\u{4fb}'..='\u{4fb}', '\u{4fd}'..='\u{4fd}', - '\u{4ff}'..='\u{4ff}', '\u{501}'..='\u{501}', '\u{503}'..='\u{503}', '\u{505}'..='\u{505}', - '\u{507}'..='\u{507}', '\u{509}'..='\u{509}', '\u{50b}'..='\u{50b}', '\u{50d}'..='\u{50d}', - '\u{50f}'..='\u{50f}', '\u{511}'..='\u{511}', '\u{513}'..='\u{513}', '\u{515}'..='\u{515}', - '\u{517}'..='\u{517}', '\u{519}'..='\u{519}', '\u{51b}'..='\u{51b}', '\u{51d}'..='\u{51d}', - '\u{51f}'..='\u{51f}', '\u{521}'..='\u{521}', '\u{523}'..='\u{523}', '\u{525}'..='\u{525}', - '\u{527}'..='\u{527}', '\u{529}'..='\u{529}', '\u{52b}'..='\u{52b}', '\u{52d}'..='\u{52d}', - '\u{52f}'..='\u{52f}', '\u{560}'..='\u{588}', '\u{10d0}'..='\u{10fa}', - '\u{10fc}'..='\u{10ff}', '\u{13f8}'..='\u{13fd}', '\u{1c80}'..='\u{1c88}', - '\u{1c8a}'..='\u{1c8a}', '\u{1d00}'..='\u{1dbf}', '\u{1e01}'..='\u{1e01}', - '\u{1e03}'..='\u{1e03}', '\u{1e05}'..='\u{1e05}', '\u{1e07}'..='\u{1e07}', - '\u{1e09}'..='\u{1e09}', '\u{1e0b}'..='\u{1e0b}', '\u{1e0d}'..='\u{1e0d}', - '\u{1e0f}'..='\u{1e0f}', '\u{1e11}'..='\u{1e11}', '\u{1e13}'..='\u{1e13}', - '\u{1e15}'..='\u{1e15}', '\u{1e17}'..='\u{1e17}', '\u{1e19}'..='\u{1e19}', - '\u{1e1b}'..='\u{1e1b}', '\u{1e1d}'..='\u{1e1d}', '\u{1e1f}'..='\u{1e1f}', - '\u{1e21}'..='\u{1e21}', '\u{1e23}'..='\u{1e23}', '\u{1e25}'..='\u{1e25}', - '\u{1e27}'..='\u{1e27}', '\u{1e29}'..='\u{1e29}', '\u{1e2b}'..='\u{1e2b}', - '\u{1e2d}'..='\u{1e2d}', '\u{1e2f}'..='\u{1e2f}', '\u{1e31}'..='\u{1e31}', - '\u{1e33}'..='\u{1e33}', '\u{1e35}'..='\u{1e35}', '\u{1e37}'..='\u{1e37}', - '\u{1e39}'..='\u{1e39}', '\u{1e3b}'..='\u{1e3b}', '\u{1e3d}'..='\u{1e3d}', - '\u{1e3f}'..='\u{1e3f}', '\u{1e41}'..='\u{1e41}', '\u{1e43}'..='\u{1e43}', - '\u{1e45}'..='\u{1e45}', '\u{1e47}'..='\u{1e47}', '\u{1e49}'..='\u{1e49}', - '\u{1e4b}'..='\u{1e4b}', '\u{1e4d}'..='\u{1e4d}', '\u{1e4f}'..='\u{1e4f}', - '\u{1e51}'..='\u{1e51}', '\u{1e53}'..='\u{1e53}', '\u{1e55}'..='\u{1e55}', - '\u{1e57}'..='\u{1e57}', '\u{1e59}'..='\u{1e59}', '\u{1e5b}'..='\u{1e5b}', - '\u{1e5d}'..='\u{1e5d}', '\u{1e5f}'..='\u{1e5f}', '\u{1e61}'..='\u{1e61}', - '\u{1e63}'..='\u{1e63}', '\u{1e65}'..='\u{1e65}', '\u{1e67}'..='\u{1e67}', - '\u{1e69}'..='\u{1e69}', '\u{1e6b}'..='\u{1e6b}', '\u{1e6d}'..='\u{1e6d}', - '\u{1e6f}'..='\u{1e6f}', '\u{1e71}'..='\u{1e71}', '\u{1e73}'..='\u{1e73}', - '\u{1e75}'..='\u{1e75}', '\u{1e77}'..='\u{1e77}', '\u{1e79}'..='\u{1e79}', - '\u{1e7b}'..='\u{1e7b}', '\u{1e7d}'..='\u{1e7d}', '\u{1e7f}'..='\u{1e7f}', - '\u{1e81}'..='\u{1e81}', '\u{1e83}'..='\u{1e83}', '\u{1e85}'..='\u{1e85}', - '\u{1e87}'..='\u{1e87}', '\u{1e89}'..='\u{1e89}', '\u{1e8b}'..='\u{1e8b}', - '\u{1e8d}'..='\u{1e8d}', '\u{1e8f}'..='\u{1e8f}', '\u{1e91}'..='\u{1e91}', - '\u{1e93}'..='\u{1e93}', '\u{1e95}'..='\u{1e9d}', '\u{1e9f}'..='\u{1e9f}', - '\u{1ea1}'..='\u{1ea1}', '\u{1ea3}'..='\u{1ea3}', '\u{1ea5}'..='\u{1ea5}', - '\u{1ea7}'..='\u{1ea7}', '\u{1ea9}'..='\u{1ea9}', '\u{1eab}'..='\u{1eab}', - '\u{1ead}'..='\u{1ead}', '\u{1eaf}'..='\u{1eaf}', '\u{1eb1}'..='\u{1eb1}', - '\u{1eb3}'..='\u{1eb3}', '\u{1eb5}'..='\u{1eb5}', '\u{1eb7}'..='\u{1eb7}', - '\u{1eb9}'..='\u{1eb9}', '\u{1ebb}'..='\u{1ebb}', '\u{1ebd}'..='\u{1ebd}', - '\u{1ebf}'..='\u{1ebf}', '\u{1ec1}'..='\u{1ec1}', '\u{1ec3}'..='\u{1ec3}', - '\u{1ec5}'..='\u{1ec5}', '\u{1ec7}'..='\u{1ec7}', '\u{1ec9}'..='\u{1ec9}', - '\u{1ecb}'..='\u{1ecb}', '\u{1ecd}'..='\u{1ecd}', '\u{1ecf}'..='\u{1ecf}', - '\u{1ed1}'..='\u{1ed1}', '\u{1ed3}'..='\u{1ed3}', '\u{1ed5}'..='\u{1ed5}', - '\u{1ed7}'..='\u{1ed7}', '\u{1ed9}'..='\u{1ed9}', '\u{1edb}'..='\u{1edb}', - '\u{1edd}'..='\u{1edd}', '\u{1edf}'..='\u{1edf}', '\u{1ee1}'..='\u{1ee1}', - '\u{1ee3}'..='\u{1ee3}', '\u{1ee5}'..='\u{1ee5}', '\u{1ee7}'..='\u{1ee7}', - '\u{1ee9}'..='\u{1ee9}', '\u{1eeb}'..='\u{1eeb}', '\u{1eed}'..='\u{1eed}', - '\u{1eef}'..='\u{1eef}', '\u{1ef1}'..='\u{1ef1}', '\u{1ef3}'..='\u{1ef3}', - '\u{1ef5}'..='\u{1ef5}', '\u{1ef7}'..='\u{1ef7}', '\u{1ef9}'..='\u{1ef9}', - '\u{1efb}'..='\u{1efb}', '\u{1efd}'..='\u{1efd}', '\u{1eff}'..='\u{1f07}', - '\u{1f10}'..='\u{1f15}', '\u{1f20}'..='\u{1f27}', '\u{1f30}'..='\u{1f37}', - '\u{1f40}'..='\u{1f45}', '\u{1f50}'..='\u{1f57}', '\u{1f60}'..='\u{1f67}', - '\u{1f70}'..='\u{1f7d}', '\u{1f80}'..='\u{1f87}', '\u{1f90}'..='\u{1f97}', - '\u{1fa0}'..='\u{1fa7}', '\u{1fb0}'..='\u{1fb4}', '\u{1fb6}'..='\u{1fb7}', - '\u{1fbe}'..='\u{1fbe}', '\u{1fc2}'..='\u{1fc4}', '\u{1fc6}'..='\u{1fc7}', - '\u{1fd0}'..='\u{1fd3}', '\u{1fd6}'..='\u{1fd7}', '\u{1fe0}'..='\u{1fe7}', - '\u{1ff2}'..='\u{1ff4}', '\u{1ff6}'..='\u{1ff7}', '\u{2071}'..='\u{2071}', - '\u{207f}'..='\u{207f}', '\u{2090}'..='\u{209c}', '\u{210a}'..='\u{210a}', - '\u{210e}'..='\u{210f}', '\u{2113}'..='\u{2113}', '\u{212f}'..='\u{212f}', - '\u{2134}'..='\u{2134}', '\u{2139}'..='\u{2139}', '\u{213c}'..='\u{213d}', - '\u{2146}'..='\u{2149}', '\u{214e}'..='\u{214e}', '\u{2170}'..='\u{217f}', - '\u{2184}'..='\u{2184}', '\u{24d0}'..='\u{24e9}', '\u{2c30}'..='\u{2c5f}', - '\u{2c61}'..='\u{2c61}', '\u{2c65}'..='\u{2c66}', '\u{2c68}'..='\u{2c68}', - '\u{2c6a}'..='\u{2c6a}', '\u{2c6c}'..='\u{2c6c}', '\u{2c71}'..='\u{2c71}', - '\u{2c73}'..='\u{2c74}', '\u{2c76}'..='\u{2c7d}', '\u{2c81}'..='\u{2c81}', - '\u{2c83}'..='\u{2c83}', '\u{2c85}'..='\u{2c85}', '\u{2c87}'..='\u{2c87}', - '\u{2c89}'..='\u{2c89}', '\u{2c8b}'..='\u{2c8b}', '\u{2c8d}'..='\u{2c8d}', - '\u{2c8f}'..='\u{2c8f}', '\u{2c91}'..='\u{2c91}', '\u{2c93}'..='\u{2c93}', - '\u{2c95}'..='\u{2c95}', '\u{2c97}'..='\u{2c97}', '\u{2c99}'..='\u{2c99}', - '\u{2c9b}'..='\u{2c9b}', '\u{2c9d}'..='\u{2c9d}', '\u{2c9f}'..='\u{2c9f}', - '\u{2ca1}'..='\u{2ca1}', '\u{2ca3}'..='\u{2ca3}', '\u{2ca5}'..='\u{2ca5}', - '\u{2ca7}'..='\u{2ca7}', '\u{2ca9}'..='\u{2ca9}', '\u{2cab}'..='\u{2cab}', - '\u{2cad}'..='\u{2cad}', '\u{2caf}'..='\u{2caf}', '\u{2cb1}'..='\u{2cb1}', - '\u{2cb3}'..='\u{2cb3}', '\u{2cb5}'..='\u{2cb5}', '\u{2cb7}'..='\u{2cb7}', - '\u{2cb9}'..='\u{2cb9}', '\u{2cbb}'..='\u{2cbb}', '\u{2cbd}'..='\u{2cbd}', - '\u{2cbf}'..='\u{2cbf}', '\u{2cc1}'..='\u{2cc1}', '\u{2cc3}'..='\u{2cc3}', - '\u{2cc5}'..='\u{2cc5}', '\u{2cc7}'..='\u{2cc7}', '\u{2cc9}'..='\u{2cc9}', - '\u{2ccb}'..='\u{2ccb}', '\u{2ccd}'..='\u{2ccd}', '\u{2ccf}'..='\u{2ccf}', - '\u{2cd1}'..='\u{2cd1}', '\u{2cd3}'..='\u{2cd3}', '\u{2cd5}'..='\u{2cd5}', - '\u{2cd7}'..='\u{2cd7}', '\u{2cd9}'..='\u{2cd9}', '\u{2cdb}'..='\u{2cdb}', - '\u{2cdd}'..='\u{2cdd}', '\u{2cdf}'..='\u{2cdf}', '\u{2ce1}'..='\u{2ce1}', - '\u{2ce3}'..='\u{2ce4}', '\u{2cec}'..='\u{2cec}', '\u{2cee}'..='\u{2cee}', - '\u{2cf3}'..='\u{2cf3}', '\u{2d00}'..='\u{2d25}', '\u{2d27}'..='\u{2d27}', - '\u{2d2d}'..='\u{2d2d}', '\u{a641}'..='\u{a641}', '\u{a643}'..='\u{a643}', - '\u{a645}'..='\u{a645}', '\u{a647}'..='\u{a647}', '\u{a649}'..='\u{a649}', - '\u{a64b}'..='\u{a64b}', '\u{a64d}'..='\u{a64d}', '\u{a64f}'..='\u{a64f}', - '\u{a651}'..='\u{a651}', '\u{a653}'..='\u{a653}', '\u{a655}'..='\u{a655}', - '\u{a657}'..='\u{a657}', '\u{a659}'..='\u{a659}', '\u{a65b}'..='\u{a65b}', - '\u{a65d}'..='\u{a65d}', '\u{a65f}'..='\u{a65f}', '\u{a661}'..='\u{a661}', - '\u{a663}'..='\u{a663}', '\u{a665}'..='\u{a665}', '\u{a667}'..='\u{a667}', - '\u{a669}'..='\u{a669}', '\u{a66b}'..='\u{a66b}', '\u{a66d}'..='\u{a66d}', - '\u{a681}'..='\u{a681}', '\u{a683}'..='\u{a683}', '\u{a685}'..='\u{a685}', - '\u{a687}'..='\u{a687}', '\u{a689}'..='\u{a689}', '\u{a68b}'..='\u{a68b}', - '\u{a68d}'..='\u{a68d}', '\u{a68f}'..='\u{a68f}', '\u{a691}'..='\u{a691}', - '\u{a693}'..='\u{a693}', '\u{a695}'..='\u{a695}', '\u{a697}'..='\u{a697}', - '\u{a699}'..='\u{a699}', '\u{a69b}'..='\u{a69d}', '\u{a723}'..='\u{a723}', - '\u{a725}'..='\u{a725}', '\u{a727}'..='\u{a727}', '\u{a729}'..='\u{a729}', - '\u{a72b}'..='\u{a72b}', '\u{a72d}'..='\u{a72d}', '\u{a72f}'..='\u{a731}', - '\u{a733}'..='\u{a733}', '\u{a735}'..='\u{a735}', '\u{a737}'..='\u{a737}', - '\u{a739}'..='\u{a739}', '\u{a73b}'..='\u{a73b}', '\u{a73d}'..='\u{a73d}', - '\u{a73f}'..='\u{a73f}', '\u{a741}'..='\u{a741}', '\u{a743}'..='\u{a743}', - '\u{a745}'..='\u{a745}', '\u{a747}'..='\u{a747}', '\u{a749}'..='\u{a749}', - '\u{a74b}'..='\u{a74b}', '\u{a74d}'..='\u{a74d}', '\u{a74f}'..='\u{a74f}', - '\u{a751}'..='\u{a751}', '\u{a753}'..='\u{a753}', '\u{a755}'..='\u{a755}', - '\u{a757}'..='\u{a757}', '\u{a759}'..='\u{a759}', '\u{a75b}'..='\u{a75b}', - '\u{a75d}'..='\u{a75d}', '\u{a75f}'..='\u{a75f}', '\u{a761}'..='\u{a761}', - '\u{a763}'..='\u{a763}', '\u{a765}'..='\u{a765}', '\u{a767}'..='\u{a767}', - '\u{a769}'..='\u{a769}', '\u{a76b}'..='\u{a76b}', '\u{a76d}'..='\u{a76d}', - '\u{a76f}'..='\u{a778}', '\u{a77a}'..='\u{a77a}', '\u{a77c}'..='\u{a77c}', - '\u{a77f}'..='\u{a77f}', '\u{a781}'..='\u{a781}', '\u{a783}'..='\u{a783}', - '\u{a785}'..='\u{a785}', '\u{a787}'..='\u{a787}', '\u{a78c}'..='\u{a78c}', - '\u{a78e}'..='\u{a78e}', '\u{a791}'..='\u{a791}', '\u{a793}'..='\u{a795}', - '\u{a797}'..='\u{a797}', '\u{a799}'..='\u{a799}', '\u{a79b}'..='\u{a79b}', - '\u{a79d}'..='\u{a79d}', '\u{a79f}'..='\u{a79f}', '\u{a7a1}'..='\u{a7a1}', - '\u{a7a3}'..='\u{a7a3}', '\u{a7a5}'..='\u{a7a5}', '\u{a7a7}'..='\u{a7a7}', - '\u{a7a9}'..='\u{a7a9}', '\u{a7af}'..='\u{a7af}', '\u{a7b5}'..='\u{a7b5}', - '\u{a7b7}'..='\u{a7b7}', '\u{a7b9}'..='\u{a7b9}', '\u{a7bb}'..='\u{a7bb}', - '\u{a7bd}'..='\u{a7bd}', '\u{a7bf}'..='\u{a7bf}', '\u{a7c1}'..='\u{a7c1}', - '\u{a7c3}'..='\u{a7c3}', '\u{a7c8}'..='\u{a7c8}', '\u{a7ca}'..='\u{a7ca}', - '\u{a7cd}'..='\u{a7cd}', '\u{a7cf}'..='\u{a7cf}', '\u{a7d1}'..='\u{a7d1}', - '\u{a7d3}'..='\u{a7d3}', '\u{a7d5}'..='\u{a7d5}', '\u{a7d7}'..='\u{a7d7}', - '\u{a7d9}'..='\u{a7d9}', '\u{a7db}'..='\u{a7db}', '\u{a7f1}'..='\u{a7f4}', - '\u{a7f6}'..='\u{a7f6}', '\u{a7f8}'..='\u{a7fa}', '\u{ab30}'..='\u{ab5a}', - '\u{ab5c}'..='\u{ab69}', '\u{ab70}'..='\u{abbf}', '\u{fb00}'..='\u{fb06}', - '\u{fb13}'..='\u{fb17}', '\u{ff41}'..='\u{ff5a}', '\u{10428}'..='\u{1044f}', - '\u{104d8}'..='\u{104fb}', '\u{10597}'..='\u{105a1}', '\u{105a3}'..='\u{105b1}', - '\u{105b3}'..='\u{105b9}', '\u{105bb}'..='\u{105bc}', '\u{10780}'..='\u{10780}', - '\u{10783}'..='\u{10785}', '\u{10787}'..='\u{107b0}', '\u{107b2}'..='\u{107ba}', - '\u{10cc0}'..='\u{10cf2}', '\u{10d70}'..='\u{10d85}', '\u{118c0}'..='\u{118df}', - '\u{16e60}'..='\u{16e7f}', '\u{16ebb}'..='\u{16ed3}', '\u{1d41a}'..='\u{1d433}', - '\u{1d44e}'..='\u{1d454}', '\u{1d456}'..='\u{1d467}', '\u{1d482}'..='\u{1d49b}', - '\u{1d4b6}'..='\u{1d4b9}', '\u{1d4bb}'..='\u{1d4bb}', '\u{1d4bd}'..='\u{1d4c3}', - '\u{1d4c5}'..='\u{1d4cf}', '\u{1d4ea}'..='\u{1d503}', '\u{1d51e}'..='\u{1d537}', - '\u{1d552}'..='\u{1d56b}', '\u{1d586}'..='\u{1d59f}', '\u{1d5ba}'..='\u{1d5d3}', - '\u{1d5ee}'..='\u{1d607}', '\u{1d622}'..='\u{1d63b}', '\u{1d656}'..='\u{1d66f}', - '\u{1d68a}'..='\u{1d6a5}', '\u{1d6c2}'..='\u{1d6da}', '\u{1d6dc}'..='\u{1d6e1}', - '\u{1d6fc}'..='\u{1d714}', '\u{1d716}'..='\u{1d71b}', '\u{1d736}'..='\u{1d74e}', - '\u{1d750}'..='\u{1d755}', '\u{1d770}'..='\u{1d788}', '\u{1d78a}'..='\u{1d78f}', - '\u{1d7aa}'..='\u{1d7c2}', '\u{1d7c4}'..='\u{1d7c9}', '\u{1d7cb}'..='\u{1d7cb}', - '\u{1df00}'..='\u{1df09}', '\u{1df0b}'..='\u{1df1e}', '\u{1df25}'..='\u{1df2a}', - '\u{1e030}'..='\u{1e06d}', '\u{1e922}'..='\u{1e943}', -]; - -#[rustfmt::skip] -pub(super) static N: &[RangeInclusive; 145] = &[ - '\u{b2}'..='\u{b3}', '\u{b9}'..='\u{b9}', '\u{bc}'..='\u{be}', '\u{660}'..='\u{669}', - '\u{6f0}'..='\u{6f9}', '\u{7c0}'..='\u{7c9}', '\u{966}'..='\u{96f}', '\u{9e6}'..='\u{9ef}', - '\u{9f4}'..='\u{9f9}', '\u{a66}'..='\u{a6f}', '\u{ae6}'..='\u{aef}', '\u{b66}'..='\u{b6f}', - '\u{b72}'..='\u{b77}', '\u{be6}'..='\u{bf2}', '\u{c66}'..='\u{c6f}', '\u{c78}'..='\u{c7e}', - '\u{ce6}'..='\u{cef}', '\u{d58}'..='\u{d5e}', '\u{d66}'..='\u{d78}', '\u{de6}'..='\u{def}', - '\u{e50}'..='\u{e59}', '\u{ed0}'..='\u{ed9}', '\u{f20}'..='\u{f33}', - '\u{1040}'..='\u{1049}', '\u{1090}'..='\u{1099}', '\u{1369}'..='\u{137c}', - '\u{16ee}'..='\u{16f0}', '\u{17e0}'..='\u{17e9}', '\u{17f0}'..='\u{17f9}', - '\u{1810}'..='\u{1819}', '\u{1946}'..='\u{194f}', '\u{19d0}'..='\u{19da}', - '\u{1a80}'..='\u{1a89}', '\u{1a90}'..='\u{1a99}', '\u{1b50}'..='\u{1b59}', - '\u{1bb0}'..='\u{1bb9}', '\u{1c40}'..='\u{1c49}', '\u{1c50}'..='\u{1c59}', - '\u{2070}'..='\u{2070}', '\u{2074}'..='\u{2079}', '\u{2080}'..='\u{2089}', - '\u{2150}'..='\u{2182}', '\u{2185}'..='\u{2189}', '\u{2460}'..='\u{249b}', - '\u{24ea}'..='\u{24ff}', '\u{2776}'..='\u{2793}', '\u{2cfd}'..='\u{2cfd}', - '\u{3007}'..='\u{3007}', '\u{3021}'..='\u{3029}', '\u{3038}'..='\u{303a}', - '\u{3192}'..='\u{3195}', '\u{3220}'..='\u{3229}', '\u{3248}'..='\u{324f}', - '\u{3251}'..='\u{325f}', '\u{3280}'..='\u{3289}', '\u{32b1}'..='\u{32bf}', - '\u{a620}'..='\u{a629}', '\u{a6e6}'..='\u{a6ef}', '\u{a830}'..='\u{a835}', - '\u{a8d0}'..='\u{a8d9}', '\u{a900}'..='\u{a909}', '\u{a9d0}'..='\u{a9d9}', - '\u{a9f0}'..='\u{a9f9}', '\u{aa50}'..='\u{aa59}', '\u{abf0}'..='\u{abf9}', - '\u{ff10}'..='\u{ff19}', '\u{10107}'..='\u{10133}', '\u{10140}'..='\u{10178}', - '\u{1018a}'..='\u{1018b}', '\u{102e1}'..='\u{102fb}', '\u{10320}'..='\u{10323}', - '\u{10341}'..='\u{10341}', '\u{1034a}'..='\u{1034a}', '\u{103d1}'..='\u{103d5}', - '\u{104a0}'..='\u{104a9}', '\u{10858}'..='\u{1085f}', '\u{10879}'..='\u{1087f}', - '\u{108a7}'..='\u{108af}', '\u{108fb}'..='\u{108ff}', '\u{10916}'..='\u{1091b}', - '\u{109bc}'..='\u{109bd}', '\u{109c0}'..='\u{109cf}', '\u{109d2}'..='\u{109ff}', - '\u{10a40}'..='\u{10a48}', '\u{10a7d}'..='\u{10a7e}', '\u{10a9d}'..='\u{10a9f}', - '\u{10aeb}'..='\u{10aef}', '\u{10b58}'..='\u{10b5f}', '\u{10b78}'..='\u{10b7f}', - '\u{10ba9}'..='\u{10baf}', '\u{10cfa}'..='\u{10cff}', '\u{10d30}'..='\u{10d39}', - '\u{10d40}'..='\u{10d49}', '\u{10e60}'..='\u{10e7e}', '\u{10f1d}'..='\u{10f26}', - '\u{10f51}'..='\u{10f54}', '\u{10fc5}'..='\u{10fcb}', '\u{11052}'..='\u{1106f}', - '\u{110f0}'..='\u{110f9}', '\u{11136}'..='\u{1113f}', '\u{111d0}'..='\u{111d9}', - '\u{111e1}'..='\u{111f4}', '\u{112f0}'..='\u{112f9}', '\u{11450}'..='\u{11459}', - '\u{114d0}'..='\u{114d9}', '\u{11650}'..='\u{11659}', '\u{116c0}'..='\u{116c9}', - '\u{116d0}'..='\u{116e3}', '\u{11730}'..='\u{1173b}', '\u{118e0}'..='\u{118f2}', - '\u{11950}'..='\u{11959}', '\u{11bf0}'..='\u{11bf9}', '\u{11c50}'..='\u{11c6c}', - '\u{11d50}'..='\u{11d59}', '\u{11da0}'..='\u{11da9}', '\u{11de0}'..='\u{11de9}', - '\u{11f50}'..='\u{11f59}', '\u{11fc0}'..='\u{11fd4}', '\u{12400}'..='\u{1246e}', - '\u{16130}'..='\u{16139}', '\u{16a60}'..='\u{16a69}', '\u{16ac0}'..='\u{16ac9}', - '\u{16b50}'..='\u{16b59}', '\u{16b5b}'..='\u{16b61}', '\u{16d70}'..='\u{16d79}', - '\u{16e80}'..='\u{16e96}', '\u{16ff4}'..='\u{16ff6}', '\u{1ccf0}'..='\u{1ccf9}', - '\u{1d2c0}'..='\u{1d2d3}', '\u{1d2e0}'..='\u{1d2f3}', '\u{1d360}'..='\u{1d378}', - '\u{1d7ce}'..='\u{1d7ff}', '\u{1e140}'..='\u{1e149}', '\u{1e2f0}'..='\u{1e2f9}', - '\u{1e4f0}'..='\u{1e4f9}', '\u{1e5f1}'..='\u{1e5fa}', '\u{1e8c7}'..='\u{1e8cf}', - '\u{1e950}'..='\u{1e959}', '\u{1ec71}'..='\u{1ecab}', '\u{1ecad}'..='\u{1ecaf}', - '\u{1ecb1}'..='\u{1ecb4}', '\u{1ed01}'..='\u{1ed2d}', '\u{1ed2f}'..='\u{1ed3d}', - '\u{1f100}'..='\u{1f10c}', '\u{1fbf0}'..='\u{1fbf9}', -]; - -#[rustfmt::skip] -pub(super) static UPPERCASE: &[RangeInclusive; 659] = &[ - '\u{c0}'..='\u{d6}', '\u{d8}'..='\u{de}', '\u{100}'..='\u{100}', '\u{102}'..='\u{102}', - '\u{104}'..='\u{104}', '\u{106}'..='\u{106}', '\u{108}'..='\u{108}', '\u{10a}'..='\u{10a}', - '\u{10c}'..='\u{10c}', '\u{10e}'..='\u{10e}', '\u{110}'..='\u{110}', '\u{112}'..='\u{112}', - '\u{114}'..='\u{114}', '\u{116}'..='\u{116}', '\u{118}'..='\u{118}', '\u{11a}'..='\u{11a}', - '\u{11c}'..='\u{11c}', '\u{11e}'..='\u{11e}', '\u{120}'..='\u{120}', '\u{122}'..='\u{122}', - '\u{124}'..='\u{124}', '\u{126}'..='\u{126}', '\u{128}'..='\u{128}', '\u{12a}'..='\u{12a}', - '\u{12c}'..='\u{12c}', '\u{12e}'..='\u{12e}', '\u{130}'..='\u{130}', '\u{132}'..='\u{132}', - '\u{134}'..='\u{134}', '\u{136}'..='\u{136}', '\u{139}'..='\u{139}', '\u{13b}'..='\u{13b}', - '\u{13d}'..='\u{13d}', '\u{13f}'..='\u{13f}', '\u{141}'..='\u{141}', '\u{143}'..='\u{143}', - '\u{145}'..='\u{145}', '\u{147}'..='\u{147}', '\u{14a}'..='\u{14a}', '\u{14c}'..='\u{14c}', - '\u{14e}'..='\u{14e}', '\u{150}'..='\u{150}', '\u{152}'..='\u{152}', '\u{154}'..='\u{154}', - '\u{156}'..='\u{156}', '\u{158}'..='\u{158}', '\u{15a}'..='\u{15a}', '\u{15c}'..='\u{15c}', - '\u{15e}'..='\u{15e}', '\u{160}'..='\u{160}', '\u{162}'..='\u{162}', '\u{164}'..='\u{164}', - '\u{166}'..='\u{166}', '\u{168}'..='\u{168}', '\u{16a}'..='\u{16a}', '\u{16c}'..='\u{16c}', - '\u{16e}'..='\u{16e}', '\u{170}'..='\u{170}', '\u{172}'..='\u{172}', '\u{174}'..='\u{174}', - '\u{176}'..='\u{176}', '\u{178}'..='\u{179}', '\u{17b}'..='\u{17b}', '\u{17d}'..='\u{17d}', - '\u{181}'..='\u{182}', '\u{184}'..='\u{184}', '\u{186}'..='\u{187}', '\u{189}'..='\u{18b}', - '\u{18e}'..='\u{191}', '\u{193}'..='\u{194}', '\u{196}'..='\u{198}', '\u{19c}'..='\u{19d}', - '\u{19f}'..='\u{1a0}', '\u{1a2}'..='\u{1a2}', '\u{1a4}'..='\u{1a4}', '\u{1a6}'..='\u{1a7}', - '\u{1a9}'..='\u{1a9}', '\u{1ac}'..='\u{1ac}', '\u{1ae}'..='\u{1af}', '\u{1b1}'..='\u{1b3}', - '\u{1b5}'..='\u{1b5}', '\u{1b7}'..='\u{1b8}', '\u{1bc}'..='\u{1bc}', '\u{1c4}'..='\u{1c4}', - '\u{1c7}'..='\u{1c7}', '\u{1ca}'..='\u{1ca}', '\u{1cd}'..='\u{1cd}', '\u{1cf}'..='\u{1cf}', - '\u{1d1}'..='\u{1d1}', '\u{1d3}'..='\u{1d3}', '\u{1d5}'..='\u{1d5}', '\u{1d7}'..='\u{1d7}', - '\u{1d9}'..='\u{1d9}', '\u{1db}'..='\u{1db}', '\u{1de}'..='\u{1de}', '\u{1e0}'..='\u{1e0}', - '\u{1e2}'..='\u{1e2}', '\u{1e4}'..='\u{1e4}', '\u{1e6}'..='\u{1e6}', '\u{1e8}'..='\u{1e8}', - '\u{1ea}'..='\u{1ea}', '\u{1ec}'..='\u{1ec}', '\u{1ee}'..='\u{1ee}', '\u{1f1}'..='\u{1f1}', - '\u{1f4}'..='\u{1f4}', '\u{1f6}'..='\u{1f8}', '\u{1fa}'..='\u{1fa}', '\u{1fc}'..='\u{1fc}', - '\u{1fe}'..='\u{1fe}', '\u{200}'..='\u{200}', '\u{202}'..='\u{202}', '\u{204}'..='\u{204}', - '\u{206}'..='\u{206}', '\u{208}'..='\u{208}', '\u{20a}'..='\u{20a}', '\u{20c}'..='\u{20c}', - '\u{20e}'..='\u{20e}', '\u{210}'..='\u{210}', '\u{212}'..='\u{212}', '\u{214}'..='\u{214}', - '\u{216}'..='\u{216}', '\u{218}'..='\u{218}', '\u{21a}'..='\u{21a}', '\u{21c}'..='\u{21c}', - '\u{21e}'..='\u{21e}', '\u{220}'..='\u{220}', '\u{222}'..='\u{222}', '\u{224}'..='\u{224}', - '\u{226}'..='\u{226}', '\u{228}'..='\u{228}', '\u{22a}'..='\u{22a}', '\u{22c}'..='\u{22c}', - '\u{22e}'..='\u{22e}', '\u{230}'..='\u{230}', '\u{232}'..='\u{232}', '\u{23a}'..='\u{23b}', - '\u{23d}'..='\u{23e}', '\u{241}'..='\u{241}', '\u{243}'..='\u{246}', '\u{248}'..='\u{248}', - '\u{24a}'..='\u{24a}', '\u{24c}'..='\u{24c}', '\u{24e}'..='\u{24e}', '\u{370}'..='\u{370}', - '\u{372}'..='\u{372}', '\u{376}'..='\u{376}', '\u{37f}'..='\u{37f}', '\u{386}'..='\u{386}', - '\u{388}'..='\u{38a}', '\u{38c}'..='\u{38c}', '\u{38e}'..='\u{38f}', '\u{391}'..='\u{3a1}', - '\u{3a3}'..='\u{3ab}', '\u{3cf}'..='\u{3cf}', '\u{3d2}'..='\u{3d4}', '\u{3d8}'..='\u{3d8}', - '\u{3da}'..='\u{3da}', '\u{3dc}'..='\u{3dc}', '\u{3de}'..='\u{3de}', '\u{3e0}'..='\u{3e0}', - '\u{3e2}'..='\u{3e2}', '\u{3e4}'..='\u{3e4}', '\u{3e6}'..='\u{3e6}', '\u{3e8}'..='\u{3e8}', - '\u{3ea}'..='\u{3ea}', '\u{3ec}'..='\u{3ec}', '\u{3ee}'..='\u{3ee}', '\u{3f4}'..='\u{3f4}', - '\u{3f7}'..='\u{3f7}', '\u{3f9}'..='\u{3fa}', '\u{3fd}'..='\u{42f}', '\u{460}'..='\u{460}', - '\u{462}'..='\u{462}', '\u{464}'..='\u{464}', '\u{466}'..='\u{466}', '\u{468}'..='\u{468}', - '\u{46a}'..='\u{46a}', '\u{46c}'..='\u{46c}', '\u{46e}'..='\u{46e}', '\u{470}'..='\u{470}', - '\u{472}'..='\u{472}', '\u{474}'..='\u{474}', '\u{476}'..='\u{476}', '\u{478}'..='\u{478}', - '\u{47a}'..='\u{47a}', '\u{47c}'..='\u{47c}', '\u{47e}'..='\u{47e}', '\u{480}'..='\u{480}', - '\u{48a}'..='\u{48a}', '\u{48c}'..='\u{48c}', '\u{48e}'..='\u{48e}', '\u{490}'..='\u{490}', - '\u{492}'..='\u{492}', '\u{494}'..='\u{494}', '\u{496}'..='\u{496}', '\u{498}'..='\u{498}', - '\u{49a}'..='\u{49a}', '\u{49c}'..='\u{49c}', '\u{49e}'..='\u{49e}', '\u{4a0}'..='\u{4a0}', - '\u{4a2}'..='\u{4a2}', '\u{4a4}'..='\u{4a4}', '\u{4a6}'..='\u{4a6}', '\u{4a8}'..='\u{4a8}', - '\u{4aa}'..='\u{4aa}', '\u{4ac}'..='\u{4ac}', '\u{4ae}'..='\u{4ae}', '\u{4b0}'..='\u{4b0}', - '\u{4b2}'..='\u{4b2}', '\u{4b4}'..='\u{4b4}', '\u{4b6}'..='\u{4b6}', '\u{4b8}'..='\u{4b8}', - '\u{4ba}'..='\u{4ba}', '\u{4bc}'..='\u{4bc}', '\u{4be}'..='\u{4be}', '\u{4c0}'..='\u{4c1}', - '\u{4c3}'..='\u{4c3}', '\u{4c5}'..='\u{4c5}', '\u{4c7}'..='\u{4c7}', '\u{4c9}'..='\u{4c9}', - '\u{4cb}'..='\u{4cb}', '\u{4cd}'..='\u{4cd}', '\u{4d0}'..='\u{4d0}', '\u{4d2}'..='\u{4d2}', - '\u{4d4}'..='\u{4d4}', '\u{4d6}'..='\u{4d6}', '\u{4d8}'..='\u{4d8}', '\u{4da}'..='\u{4da}', - '\u{4dc}'..='\u{4dc}', '\u{4de}'..='\u{4de}', '\u{4e0}'..='\u{4e0}', '\u{4e2}'..='\u{4e2}', - '\u{4e4}'..='\u{4e4}', '\u{4e6}'..='\u{4e6}', '\u{4e8}'..='\u{4e8}', '\u{4ea}'..='\u{4ea}', - '\u{4ec}'..='\u{4ec}', '\u{4ee}'..='\u{4ee}', '\u{4f0}'..='\u{4f0}', '\u{4f2}'..='\u{4f2}', - '\u{4f4}'..='\u{4f4}', '\u{4f6}'..='\u{4f6}', '\u{4f8}'..='\u{4f8}', '\u{4fa}'..='\u{4fa}', - '\u{4fc}'..='\u{4fc}', '\u{4fe}'..='\u{4fe}', '\u{500}'..='\u{500}', '\u{502}'..='\u{502}', - '\u{504}'..='\u{504}', '\u{506}'..='\u{506}', '\u{508}'..='\u{508}', '\u{50a}'..='\u{50a}', - '\u{50c}'..='\u{50c}', '\u{50e}'..='\u{50e}', '\u{510}'..='\u{510}', '\u{512}'..='\u{512}', - '\u{514}'..='\u{514}', '\u{516}'..='\u{516}', '\u{518}'..='\u{518}', '\u{51a}'..='\u{51a}', - '\u{51c}'..='\u{51c}', '\u{51e}'..='\u{51e}', '\u{520}'..='\u{520}', '\u{522}'..='\u{522}', - '\u{524}'..='\u{524}', '\u{526}'..='\u{526}', '\u{528}'..='\u{528}', '\u{52a}'..='\u{52a}', - '\u{52c}'..='\u{52c}', '\u{52e}'..='\u{52e}', '\u{531}'..='\u{556}', - '\u{10a0}'..='\u{10c5}', '\u{10c7}'..='\u{10c7}', '\u{10cd}'..='\u{10cd}', - '\u{13a0}'..='\u{13f5}', '\u{1c89}'..='\u{1c89}', '\u{1c90}'..='\u{1cba}', - '\u{1cbd}'..='\u{1cbf}', '\u{1e00}'..='\u{1e00}', '\u{1e02}'..='\u{1e02}', - '\u{1e04}'..='\u{1e04}', '\u{1e06}'..='\u{1e06}', '\u{1e08}'..='\u{1e08}', - '\u{1e0a}'..='\u{1e0a}', '\u{1e0c}'..='\u{1e0c}', '\u{1e0e}'..='\u{1e0e}', - '\u{1e10}'..='\u{1e10}', '\u{1e12}'..='\u{1e12}', '\u{1e14}'..='\u{1e14}', - '\u{1e16}'..='\u{1e16}', '\u{1e18}'..='\u{1e18}', '\u{1e1a}'..='\u{1e1a}', - '\u{1e1c}'..='\u{1e1c}', '\u{1e1e}'..='\u{1e1e}', '\u{1e20}'..='\u{1e20}', - '\u{1e22}'..='\u{1e22}', '\u{1e24}'..='\u{1e24}', '\u{1e26}'..='\u{1e26}', - '\u{1e28}'..='\u{1e28}', '\u{1e2a}'..='\u{1e2a}', '\u{1e2c}'..='\u{1e2c}', - '\u{1e2e}'..='\u{1e2e}', '\u{1e30}'..='\u{1e30}', '\u{1e32}'..='\u{1e32}', - '\u{1e34}'..='\u{1e34}', '\u{1e36}'..='\u{1e36}', '\u{1e38}'..='\u{1e38}', - '\u{1e3a}'..='\u{1e3a}', '\u{1e3c}'..='\u{1e3c}', '\u{1e3e}'..='\u{1e3e}', - '\u{1e40}'..='\u{1e40}', '\u{1e42}'..='\u{1e42}', '\u{1e44}'..='\u{1e44}', - '\u{1e46}'..='\u{1e46}', '\u{1e48}'..='\u{1e48}', '\u{1e4a}'..='\u{1e4a}', - '\u{1e4c}'..='\u{1e4c}', '\u{1e4e}'..='\u{1e4e}', '\u{1e50}'..='\u{1e50}', - '\u{1e52}'..='\u{1e52}', '\u{1e54}'..='\u{1e54}', '\u{1e56}'..='\u{1e56}', - '\u{1e58}'..='\u{1e58}', '\u{1e5a}'..='\u{1e5a}', '\u{1e5c}'..='\u{1e5c}', - '\u{1e5e}'..='\u{1e5e}', '\u{1e60}'..='\u{1e60}', '\u{1e62}'..='\u{1e62}', - '\u{1e64}'..='\u{1e64}', '\u{1e66}'..='\u{1e66}', '\u{1e68}'..='\u{1e68}', - '\u{1e6a}'..='\u{1e6a}', '\u{1e6c}'..='\u{1e6c}', '\u{1e6e}'..='\u{1e6e}', - '\u{1e70}'..='\u{1e70}', '\u{1e72}'..='\u{1e72}', '\u{1e74}'..='\u{1e74}', - '\u{1e76}'..='\u{1e76}', '\u{1e78}'..='\u{1e78}', '\u{1e7a}'..='\u{1e7a}', - '\u{1e7c}'..='\u{1e7c}', '\u{1e7e}'..='\u{1e7e}', '\u{1e80}'..='\u{1e80}', - '\u{1e82}'..='\u{1e82}', '\u{1e84}'..='\u{1e84}', '\u{1e86}'..='\u{1e86}', - '\u{1e88}'..='\u{1e88}', '\u{1e8a}'..='\u{1e8a}', '\u{1e8c}'..='\u{1e8c}', - '\u{1e8e}'..='\u{1e8e}', '\u{1e90}'..='\u{1e90}', '\u{1e92}'..='\u{1e92}', - '\u{1e94}'..='\u{1e94}', '\u{1e9e}'..='\u{1e9e}', '\u{1ea0}'..='\u{1ea0}', - '\u{1ea2}'..='\u{1ea2}', '\u{1ea4}'..='\u{1ea4}', '\u{1ea6}'..='\u{1ea6}', - '\u{1ea8}'..='\u{1ea8}', '\u{1eaa}'..='\u{1eaa}', '\u{1eac}'..='\u{1eac}', - '\u{1eae}'..='\u{1eae}', '\u{1eb0}'..='\u{1eb0}', '\u{1eb2}'..='\u{1eb2}', - '\u{1eb4}'..='\u{1eb4}', '\u{1eb6}'..='\u{1eb6}', '\u{1eb8}'..='\u{1eb8}', - '\u{1eba}'..='\u{1eba}', '\u{1ebc}'..='\u{1ebc}', '\u{1ebe}'..='\u{1ebe}', - '\u{1ec0}'..='\u{1ec0}', '\u{1ec2}'..='\u{1ec2}', '\u{1ec4}'..='\u{1ec4}', - '\u{1ec6}'..='\u{1ec6}', '\u{1ec8}'..='\u{1ec8}', '\u{1eca}'..='\u{1eca}', - '\u{1ecc}'..='\u{1ecc}', '\u{1ece}'..='\u{1ece}', '\u{1ed0}'..='\u{1ed0}', - '\u{1ed2}'..='\u{1ed2}', '\u{1ed4}'..='\u{1ed4}', '\u{1ed6}'..='\u{1ed6}', - '\u{1ed8}'..='\u{1ed8}', '\u{1eda}'..='\u{1eda}', '\u{1edc}'..='\u{1edc}', - '\u{1ede}'..='\u{1ede}', '\u{1ee0}'..='\u{1ee0}', '\u{1ee2}'..='\u{1ee2}', - '\u{1ee4}'..='\u{1ee4}', '\u{1ee6}'..='\u{1ee6}', '\u{1ee8}'..='\u{1ee8}', - '\u{1eea}'..='\u{1eea}', '\u{1eec}'..='\u{1eec}', '\u{1eee}'..='\u{1eee}', - '\u{1ef0}'..='\u{1ef0}', '\u{1ef2}'..='\u{1ef2}', '\u{1ef4}'..='\u{1ef4}', - '\u{1ef6}'..='\u{1ef6}', '\u{1ef8}'..='\u{1ef8}', '\u{1efa}'..='\u{1efa}', - '\u{1efc}'..='\u{1efc}', '\u{1efe}'..='\u{1efe}', '\u{1f08}'..='\u{1f0f}', - '\u{1f18}'..='\u{1f1d}', '\u{1f28}'..='\u{1f2f}', '\u{1f38}'..='\u{1f3f}', - '\u{1f48}'..='\u{1f4d}', '\u{1f59}'..='\u{1f59}', '\u{1f5b}'..='\u{1f5b}', - '\u{1f5d}'..='\u{1f5d}', '\u{1f5f}'..='\u{1f5f}', '\u{1f68}'..='\u{1f6f}', - '\u{1fb8}'..='\u{1fbb}', '\u{1fc8}'..='\u{1fcb}', '\u{1fd8}'..='\u{1fdb}', - '\u{1fe8}'..='\u{1fec}', '\u{1ff8}'..='\u{1ffb}', '\u{2102}'..='\u{2102}', - '\u{2107}'..='\u{2107}', '\u{210b}'..='\u{210d}', '\u{2110}'..='\u{2112}', - '\u{2115}'..='\u{2115}', '\u{2119}'..='\u{211d}', '\u{2124}'..='\u{2124}', - '\u{2126}'..='\u{2126}', '\u{2128}'..='\u{2128}', '\u{212a}'..='\u{212d}', - '\u{2130}'..='\u{2133}', '\u{213e}'..='\u{213f}', '\u{2145}'..='\u{2145}', - '\u{2160}'..='\u{216f}', '\u{2183}'..='\u{2183}', '\u{24b6}'..='\u{24cf}', - '\u{2c00}'..='\u{2c2f}', '\u{2c60}'..='\u{2c60}', '\u{2c62}'..='\u{2c64}', - '\u{2c67}'..='\u{2c67}', '\u{2c69}'..='\u{2c69}', '\u{2c6b}'..='\u{2c6b}', - '\u{2c6d}'..='\u{2c70}', '\u{2c72}'..='\u{2c72}', '\u{2c75}'..='\u{2c75}', - '\u{2c7e}'..='\u{2c80}', '\u{2c82}'..='\u{2c82}', '\u{2c84}'..='\u{2c84}', - '\u{2c86}'..='\u{2c86}', '\u{2c88}'..='\u{2c88}', '\u{2c8a}'..='\u{2c8a}', - '\u{2c8c}'..='\u{2c8c}', '\u{2c8e}'..='\u{2c8e}', '\u{2c90}'..='\u{2c90}', - '\u{2c92}'..='\u{2c92}', '\u{2c94}'..='\u{2c94}', '\u{2c96}'..='\u{2c96}', - '\u{2c98}'..='\u{2c98}', '\u{2c9a}'..='\u{2c9a}', '\u{2c9c}'..='\u{2c9c}', - '\u{2c9e}'..='\u{2c9e}', '\u{2ca0}'..='\u{2ca0}', '\u{2ca2}'..='\u{2ca2}', - '\u{2ca4}'..='\u{2ca4}', '\u{2ca6}'..='\u{2ca6}', '\u{2ca8}'..='\u{2ca8}', - '\u{2caa}'..='\u{2caa}', '\u{2cac}'..='\u{2cac}', '\u{2cae}'..='\u{2cae}', - '\u{2cb0}'..='\u{2cb0}', '\u{2cb2}'..='\u{2cb2}', '\u{2cb4}'..='\u{2cb4}', - '\u{2cb6}'..='\u{2cb6}', '\u{2cb8}'..='\u{2cb8}', '\u{2cba}'..='\u{2cba}', - '\u{2cbc}'..='\u{2cbc}', '\u{2cbe}'..='\u{2cbe}', '\u{2cc0}'..='\u{2cc0}', - '\u{2cc2}'..='\u{2cc2}', '\u{2cc4}'..='\u{2cc4}', '\u{2cc6}'..='\u{2cc6}', - '\u{2cc8}'..='\u{2cc8}', '\u{2cca}'..='\u{2cca}', '\u{2ccc}'..='\u{2ccc}', - '\u{2cce}'..='\u{2cce}', '\u{2cd0}'..='\u{2cd0}', '\u{2cd2}'..='\u{2cd2}', - '\u{2cd4}'..='\u{2cd4}', '\u{2cd6}'..='\u{2cd6}', '\u{2cd8}'..='\u{2cd8}', - '\u{2cda}'..='\u{2cda}', '\u{2cdc}'..='\u{2cdc}', '\u{2cde}'..='\u{2cde}', - '\u{2ce0}'..='\u{2ce0}', '\u{2ce2}'..='\u{2ce2}', '\u{2ceb}'..='\u{2ceb}', - '\u{2ced}'..='\u{2ced}', '\u{2cf2}'..='\u{2cf2}', '\u{a640}'..='\u{a640}', - '\u{a642}'..='\u{a642}', '\u{a644}'..='\u{a644}', '\u{a646}'..='\u{a646}', - '\u{a648}'..='\u{a648}', '\u{a64a}'..='\u{a64a}', '\u{a64c}'..='\u{a64c}', - '\u{a64e}'..='\u{a64e}', '\u{a650}'..='\u{a650}', '\u{a652}'..='\u{a652}', - '\u{a654}'..='\u{a654}', '\u{a656}'..='\u{a656}', '\u{a658}'..='\u{a658}', - '\u{a65a}'..='\u{a65a}', '\u{a65c}'..='\u{a65c}', '\u{a65e}'..='\u{a65e}', - '\u{a660}'..='\u{a660}', '\u{a662}'..='\u{a662}', '\u{a664}'..='\u{a664}', - '\u{a666}'..='\u{a666}', '\u{a668}'..='\u{a668}', '\u{a66a}'..='\u{a66a}', - '\u{a66c}'..='\u{a66c}', '\u{a680}'..='\u{a680}', '\u{a682}'..='\u{a682}', - '\u{a684}'..='\u{a684}', '\u{a686}'..='\u{a686}', '\u{a688}'..='\u{a688}', - '\u{a68a}'..='\u{a68a}', '\u{a68c}'..='\u{a68c}', '\u{a68e}'..='\u{a68e}', - '\u{a690}'..='\u{a690}', '\u{a692}'..='\u{a692}', '\u{a694}'..='\u{a694}', - '\u{a696}'..='\u{a696}', '\u{a698}'..='\u{a698}', '\u{a69a}'..='\u{a69a}', - '\u{a722}'..='\u{a722}', '\u{a724}'..='\u{a724}', '\u{a726}'..='\u{a726}', - '\u{a728}'..='\u{a728}', '\u{a72a}'..='\u{a72a}', '\u{a72c}'..='\u{a72c}', - '\u{a72e}'..='\u{a72e}', '\u{a732}'..='\u{a732}', '\u{a734}'..='\u{a734}', - '\u{a736}'..='\u{a736}', '\u{a738}'..='\u{a738}', '\u{a73a}'..='\u{a73a}', - '\u{a73c}'..='\u{a73c}', '\u{a73e}'..='\u{a73e}', '\u{a740}'..='\u{a740}', - '\u{a742}'..='\u{a742}', '\u{a744}'..='\u{a744}', '\u{a746}'..='\u{a746}', - '\u{a748}'..='\u{a748}', '\u{a74a}'..='\u{a74a}', '\u{a74c}'..='\u{a74c}', - '\u{a74e}'..='\u{a74e}', '\u{a750}'..='\u{a750}', '\u{a752}'..='\u{a752}', - '\u{a754}'..='\u{a754}', '\u{a756}'..='\u{a756}', '\u{a758}'..='\u{a758}', - '\u{a75a}'..='\u{a75a}', '\u{a75c}'..='\u{a75c}', '\u{a75e}'..='\u{a75e}', - '\u{a760}'..='\u{a760}', '\u{a762}'..='\u{a762}', '\u{a764}'..='\u{a764}', - '\u{a766}'..='\u{a766}', '\u{a768}'..='\u{a768}', '\u{a76a}'..='\u{a76a}', - '\u{a76c}'..='\u{a76c}', '\u{a76e}'..='\u{a76e}', '\u{a779}'..='\u{a779}', - '\u{a77b}'..='\u{a77b}', '\u{a77d}'..='\u{a77e}', '\u{a780}'..='\u{a780}', - '\u{a782}'..='\u{a782}', '\u{a784}'..='\u{a784}', '\u{a786}'..='\u{a786}', - '\u{a78b}'..='\u{a78b}', '\u{a78d}'..='\u{a78d}', '\u{a790}'..='\u{a790}', - '\u{a792}'..='\u{a792}', '\u{a796}'..='\u{a796}', '\u{a798}'..='\u{a798}', - '\u{a79a}'..='\u{a79a}', '\u{a79c}'..='\u{a79c}', '\u{a79e}'..='\u{a79e}', - '\u{a7a0}'..='\u{a7a0}', '\u{a7a2}'..='\u{a7a2}', '\u{a7a4}'..='\u{a7a4}', - '\u{a7a6}'..='\u{a7a6}', '\u{a7a8}'..='\u{a7a8}', '\u{a7aa}'..='\u{a7ae}', - '\u{a7b0}'..='\u{a7b4}', '\u{a7b6}'..='\u{a7b6}', '\u{a7b8}'..='\u{a7b8}', - '\u{a7ba}'..='\u{a7ba}', '\u{a7bc}'..='\u{a7bc}', '\u{a7be}'..='\u{a7be}', - '\u{a7c0}'..='\u{a7c0}', '\u{a7c2}'..='\u{a7c2}', '\u{a7c4}'..='\u{a7c7}', - '\u{a7c9}'..='\u{a7c9}', '\u{a7cb}'..='\u{a7cc}', '\u{a7ce}'..='\u{a7ce}', - '\u{a7d0}'..='\u{a7d0}', '\u{a7d2}'..='\u{a7d2}', '\u{a7d4}'..='\u{a7d4}', - '\u{a7d6}'..='\u{a7d6}', '\u{a7d8}'..='\u{a7d8}', '\u{a7da}'..='\u{a7da}', - '\u{a7dc}'..='\u{a7dc}', '\u{a7f5}'..='\u{a7f5}', '\u{ff21}'..='\u{ff3a}', - '\u{10400}'..='\u{10427}', '\u{104b0}'..='\u{104d3}', '\u{10570}'..='\u{1057a}', - '\u{1057c}'..='\u{1058a}', '\u{1058c}'..='\u{10592}', '\u{10594}'..='\u{10595}', - '\u{10c80}'..='\u{10cb2}', '\u{10d50}'..='\u{10d65}', '\u{118a0}'..='\u{118bf}', - '\u{16e40}'..='\u{16e5f}', '\u{16ea0}'..='\u{16eb8}', '\u{1d400}'..='\u{1d419}', - '\u{1d434}'..='\u{1d44d}', '\u{1d468}'..='\u{1d481}', '\u{1d49c}'..='\u{1d49c}', - '\u{1d49e}'..='\u{1d49f}', '\u{1d4a2}'..='\u{1d4a2}', '\u{1d4a5}'..='\u{1d4a6}', - '\u{1d4a9}'..='\u{1d4ac}', '\u{1d4ae}'..='\u{1d4b5}', '\u{1d4d0}'..='\u{1d4e9}', - '\u{1d504}'..='\u{1d505}', '\u{1d507}'..='\u{1d50a}', '\u{1d50d}'..='\u{1d514}', - '\u{1d516}'..='\u{1d51c}', '\u{1d538}'..='\u{1d539}', '\u{1d53b}'..='\u{1d53e}', - '\u{1d540}'..='\u{1d544}', '\u{1d546}'..='\u{1d546}', '\u{1d54a}'..='\u{1d550}', - '\u{1d56c}'..='\u{1d585}', '\u{1d5a0}'..='\u{1d5b9}', '\u{1d5d4}'..='\u{1d5ed}', - '\u{1d608}'..='\u{1d621}', '\u{1d63c}'..='\u{1d655}', '\u{1d670}'..='\u{1d689}', - '\u{1d6a8}'..='\u{1d6c0}', '\u{1d6e2}'..='\u{1d6fa}', '\u{1d71c}'..='\u{1d734}', - '\u{1d756}'..='\u{1d76e}', '\u{1d790}'..='\u{1d7a8}', '\u{1d7ca}'..='\u{1d7ca}', - '\u{1e900}'..='\u{1e921}', '\u{1f130}'..='\u{1f149}', '\u{1f150}'..='\u{1f169}', - '\u{1f170}'..='\u{1f189}', -]; - -#[rustfmt::skip] -pub(super) static WHITE_SPACE: &[RangeInclusive; 8] = &[ - '\u{85}'..='\u{85}', '\u{a0}'..='\u{a0}', '\u{1680}'..='\u{1680}', '\u{2000}'..='\u{200a}', - '\u{2028}'..='\u{2029}', '\u{202f}'..='\u{202f}', '\u{205f}'..='\u{205f}', - '\u{3000}'..='\u{3000}', -]; - -#[rustfmt::skip] -pub(super) static TO_LOWER: &[(char, [char; 3]); 1488] = &[ - ('\u{41}', ['\u{61}', '\u{0}', '\u{0}']), ('\u{42}', ['\u{62}', '\u{0}', '\u{0}']), - ('\u{43}', ['\u{63}', '\u{0}', '\u{0}']), ('\u{44}', ['\u{64}', '\u{0}', '\u{0}']), - ('\u{45}', ['\u{65}', '\u{0}', '\u{0}']), ('\u{46}', ['\u{66}', '\u{0}', '\u{0}']), - ('\u{47}', ['\u{67}', '\u{0}', '\u{0}']), ('\u{48}', ['\u{68}', '\u{0}', '\u{0}']), - ('\u{49}', ['\u{69}', '\u{0}', '\u{0}']), ('\u{4a}', ['\u{6a}', '\u{0}', '\u{0}']), - ('\u{4b}', ['\u{6b}', '\u{0}', '\u{0}']), ('\u{4c}', ['\u{6c}', '\u{0}', '\u{0}']), - ('\u{4d}', ['\u{6d}', '\u{0}', '\u{0}']), ('\u{4e}', ['\u{6e}', '\u{0}', '\u{0}']), - ('\u{4f}', ['\u{6f}', '\u{0}', '\u{0}']), ('\u{50}', ['\u{70}', '\u{0}', '\u{0}']), - ('\u{51}', ['\u{71}', '\u{0}', '\u{0}']), ('\u{52}', ['\u{72}', '\u{0}', '\u{0}']), - ('\u{53}', ['\u{73}', '\u{0}', '\u{0}']), ('\u{54}', ['\u{74}', '\u{0}', '\u{0}']), - ('\u{55}', ['\u{75}', '\u{0}', '\u{0}']), ('\u{56}', ['\u{76}', '\u{0}', '\u{0}']), - ('\u{57}', ['\u{77}', '\u{0}', '\u{0}']), ('\u{58}', ['\u{78}', '\u{0}', '\u{0}']), - ('\u{59}', ['\u{79}', '\u{0}', '\u{0}']), ('\u{5a}', ['\u{7a}', '\u{0}', '\u{0}']), - ('\u{c0}', ['\u{e0}', '\u{0}', '\u{0}']), ('\u{c1}', ['\u{e1}', '\u{0}', '\u{0}']), - ('\u{c2}', ['\u{e2}', '\u{0}', '\u{0}']), ('\u{c3}', ['\u{e3}', '\u{0}', '\u{0}']), - ('\u{c4}', ['\u{e4}', '\u{0}', '\u{0}']), ('\u{c5}', ['\u{e5}', '\u{0}', '\u{0}']), - ('\u{c6}', ['\u{e6}', '\u{0}', '\u{0}']), ('\u{c7}', ['\u{e7}', '\u{0}', '\u{0}']), - ('\u{c8}', ['\u{e8}', '\u{0}', '\u{0}']), ('\u{c9}', ['\u{e9}', '\u{0}', '\u{0}']), - ('\u{ca}', ['\u{ea}', '\u{0}', '\u{0}']), ('\u{cb}', ['\u{eb}', '\u{0}', '\u{0}']), - ('\u{cc}', ['\u{ec}', '\u{0}', '\u{0}']), ('\u{cd}', ['\u{ed}', '\u{0}', '\u{0}']), - ('\u{ce}', ['\u{ee}', '\u{0}', '\u{0}']), ('\u{cf}', ['\u{ef}', '\u{0}', '\u{0}']), - ('\u{d0}', ['\u{f0}', '\u{0}', '\u{0}']), ('\u{d1}', ['\u{f1}', '\u{0}', '\u{0}']), - ('\u{d2}', ['\u{f2}', '\u{0}', '\u{0}']), ('\u{d3}', ['\u{f3}', '\u{0}', '\u{0}']), - ('\u{d4}', ['\u{f4}', '\u{0}', '\u{0}']), ('\u{d5}', ['\u{f5}', '\u{0}', '\u{0}']), - ('\u{d6}', ['\u{f6}', '\u{0}', '\u{0}']), ('\u{d8}', ['\u{f8}', '\u{0}', '\u{0}']), - ('\u{d9}', ['\u{f9}', '\u{0}', '\u{0}']), ('\u{da}', ['\u{fa}', '\u{0}', '\u{0}']), - ('\u{db}', ['\u{fb}', '\u{0}', '\u{0}']), ('\u{dc}', ['\u{fc}', '\u{0}', '\u{0}']), - ('\u{dd}', ['\u{fd}', '\u{0}', '\u{0}']), ('\u{de}', ['\u{fe}', '\u{0}', '\u{0}']), - ('\u{100}', ['\u{101}', '\u{0}', '\u{0}']), ('\u{102}', ['\u{103}', '\u{0}', '\u{0}']), - ('\u{104}', ['\u{105}', '\u{0}', '\u{0}']), ('\u{106}', ['\u{107}', '\u{0}', '\u{0}']), - ('\u{108}', ['\u{109}', '\u{0}', '\u{0}']), ('\u{10a}', ['\u{10b}', '\u{0}', '\u{0}']), - ('\u{10c}', ['\u{10d}', '\u{0}', '\u{0}']), ('\u{10e}', ['\u{10f}', '\u{0}', '\u{0}']), - ('\u{110}', ['\u{111}', '\u{0}', '\u{0}']), ('\u{112}', ['\u{113}', '\u{0}', '\u{0}']), - ('\u{114}', ['\u{115}', '\u{0}', '\u{0}']), ('\u{116}', ['\u{117}', '\u{0}', '\u{0}']), - ('\u{118}', ['\u{119}', '\u{0}', '\u{0}']), ('\u{11a}', ['\u{11b}', '\u{0}', '\u{0}']), - ('\u{11c}', ['\u{11d}', '\u{0}', '\u{0}']), ('\u{11e}', ['\u{11f}', '\u{0}', '\u{0}']), - ('\u{120}', ['\u{121}', '\u{0}', '\u{0}']), ('\u{122}', ['\u{123}', '\u{0}', '\u{0}']), - ('\u{124}', ['\u{125}', '\u{0}', '\u{0}']), ('\u{126}', ['\u{127}', '\u{0}', '\u{0}']), - ('\u{128}', ['\u{129}', '\u{0}', '\u{0}']), ('\u{12a}', ['\u{12b}', '\u{0}', '\u{0}']), - ('\u{12c}', ['\u{12d}', '\u{0}', '\u{0}']), ('\u{12e}', ['\u{12f}', '\u{0}', '\u{0}']), - ('\u{130}', ['\u{69}', '\u{307}', '\u{0}']), ('\u{132}', ['\u{133}', '\u{0}', '\u{0}']), - ('\u{134}', ['\u{135}', '\u{0}', '\u{0}']), ('\u{136}', ['\u{137}', '\u{0}', '\u{0}']), - ('\u{139}', ['\u{13a}', '\u{0}', '\u{0}']), ('\u{13b}', ['\u{13c}', '\u{0}', '\u{0}']), - ('\u{13d}', ['\u{13e}', '\u{0}', '\u{0}']), ('\u{13f}', ['\u{140}', '\u{0}', '\u{0}']), - ('\u{141}', ['\u{142}', '\u{0}', '\u{0}']), ('\u{143}', ['\u{144}', '\u{0}', '\u{0}']), - ('\u{145}', ['\u{146}', '\u{0}', '\u{0}']), ('\u{147}', ['\u{148}', '\u{0}', '\u{0}']), - ('\u{14a}', ['\u{14b}', '\u{0}', '\u{0}']), ('\u{14c}', ['\u{14d}', '\u{0}', '\u{0}']), - ('\u{14e}', ['\u{14f}', '\u{0}', '\u{0}']), ('\u{150}', ['\u{151}', '\u{0}', '\u{0}']), - ('\u{152}', ['\u{153}', '\u{0}', '\u{0}']), ('\u{154}', ['\u{155}', '\u{0}', '\u{0}']), - ('\u{156}', ['\u{157}', '\u{0}', '\u{0}']), ('\u{158}', ['\u{159}', '\u{0}', '\u{0}']), - ('\u{15a}', ['\u{15b}', '\u{0}', '\u{0}']), ('\u{15c}', ['\u{15d}', '\u{0}', '\u{0}']), - ('\u{15e}', ['\u{15f}', '\u{0}', '\u{0}']), ('\u{160}', ['\u{161}', '\u{0}', '\u{0}']), - ('\u{162}', ['\u{163}', '\u{0}', '\u{0}']), ('\u{164}', ['\u{165}', '\u{0}', '\u{0}']), - ('\u{166}', ['\u{167}', '\u{0}', '\u{0}']), ('\u{168}', ['\u{169}', '\u{0}', '\u{0}']), - ('\u{16a}', ['\u{16b}', '\u{0}', '\u{0}']), ('\u{16c}', ['\u{16d}', '\u{0}', '\u{0}']), - ('\u{16e}', ['\u{16f}', '\u{0}', '\u{0}']), ('\u{170}', ['\u{171}', '\u{0}', '\u{0}']), - ('\u{172}', ['\u{173}', '\u{0}', '\u{0}']), ('\u{174}', ['\u{175}', '\u{0}', '\u{0}']), - ('\u{176}', ['\u{177}', '\u{0}', '\u{0}']), ('\u{178}', ['\u{ff}', '\u{0}', '\u{0}']), - ('\u{179}', ['\u{17a}', '\u{0}', '\u{0}']), ('\u{17b}', ['\u{17c}', '\u{0}', '\u{0}']), - ('\u{17d}', ['\u{17e}', '\u{0}', '\u{0}']), ('\u{181}', ['\u{253}', '\u{0}', '\u{0}']), - ('\u{182}', ['\u{183}', '\u{0}', '\u{0}']), ('\u{184}', ['\u{185}', '\u{0}', '\u{0}']), - ('\u{186}', ['\u{254}', '\u{0}', '\u{0}']), ('\u{187}', ['\u{188}', '\u{0}', '\u{0}']), - ('\u{189}', ['\u{256}', '\u{0}', '\u{0}']), ('\u{18a}', ['\u{257}', '\u{0}', '\u{0}']), - ('\u{18b}', ['\u{18c}', '\u{0}', '\u{0}']), ('\u{18e}', ['\u{1dd}', '\u{0}', '\u{0}']), - ('\u{18f}', ['\u{259}', '\u{0}', '\u{0}']), ('\u{190}', ['\u{25b}', '\u{0}', '\u{0}']), - ('\u{191}', ['\u{192}', '\u{0}', '\u{0}']), ('\u{193}', ['\u{260}', '\u{0}', '\u{0}']), - ('\u{194}', ['\u{263}', '\u{0}', '\u{0}']), ('\u{196}', ['\u{269}', '\u{0}', '\u{0}']), - ('\u{197}', ['\u{268}', '\u{0}', '\u{0}']), ('\u{198}', ['\u{199}', '\u{0}', '\u{0}']), - ('\u{19c}', ['\u{26f}', '\u{0}', '\u{0}']), ('\u{19d}', ['\u{272}', '\u{0}', '\u{0}']), - ('\u{19f}', ['\u{275}', '\u{0}', '\u{0}']), ('\u{1a0}', ['\u{1a1}', '\u{0}', '\u{0}']), - ('\u{1a2}', ['\u{1a3}', '\u{0}', '\u{0}']), ('\u{1a4}', ['\u{1a5}', '\u{0}', '\u{0}']), - ('\u{1a6}', ['\u{280}', '\u{0}', '\u{0}']), ('\u{1a7}', ['\u{1a8}', '\u{0}', '\u{0}']), - ('\u{1a9}', ['\u{283}', '\u{0}', '\u{0}']), ('\u{1ac}', ['\u{1ad}', '\u{0}', '\u{0}']), - ('\u{1ae}', ['\u{288}', '\u{0}', '\u{0}']), ('\u{1af}', ['\u{1b0}', '\u{0}', '\u{0}']), - ('\u{1b1}', ['\u{28a}', '\u{0}', '\u{0}']), ('\u{1b2}', ['\u{28b}', '\u{0}', '\u{0}']), - ('\u{1b3}', ['\u{1b4}', '\u{0}', '\u{0}']), ('\u{1b5}', ['\u{1b6}', '\u{0}', '\u{0}']), - ('\u{1b7}', ['\u{292}', '\u{0}', '\u{0}']), ('\u{1b8}', ['\u{1b9}', '\u{0}', '\u{0}']), - ('\u{1bc}', ['\u{1bd}', '\u{0}', '\u{0}']), ('\u{1c4}', ['\u{1c6}', '\u{0}', '\u{0}']), - ('\u{1c5}', ['\u{1c6}', '\u{0}', '\u{0}']), ('\u{1c7}', ['\u{1c9}', '\u{0}', '\u{0}']), - ('\u{1c8}', ['\u{1c9}', '\u{0}', '\u{0}']), ('\u{1ca}', ['\u{1cc}', '\u{0}', '\u{0}']), - ('\u{1cb}', ['\u{1cc}', '\u{0}', '\u{0}']), ('\u{1cd}', ['\u{1ce}', '\u{0}', '\u{0}']), - ('\u{1cf}', ['\u{1d0}', '\u{0}', '\u{0}']), ('\u{1d1}', ['\u{1d2}', '\u{0}', '\u{0}']), - ('\u{1d3}', ['\u{1d4}', '\u{0}', '\u{0}']), ('\u{1d5}', ['\u{1d6}', '\u{0}', '\u{0}']), - ('\u{1d7}', ['\u{1d8}', '\u{0}', '\u{0}']), ('\u{1d9}', ['\u{1da}', '\u{0}', '\u{0}']), - ('\u{1db}', ['\u{1dc}', '\u{0}', '\u{0}']), ('\u{1de}', ['\u{1df}', '\u{0}', '\u{0}']), - ('\u{1e0}', ['\u{1e1}', '\u{0}', '\u{0}']), ('\u{1e2}', ['\u{1e3}', '\u{0}', '\u{0}']), - ('\u{1e4}', ['\u{1e5}', '\u{0}', '\u{0}']), ('\u{1e6}', ['\u{1e7}', '\u{0}', '\u{0}']), - ('\u{1e8}', ['\u{1e9}', '\u{0}', '\u{0}']), ('\u{1ea}', ['\u{1eb}', '\u{0}', '\u{0}']), - ('\u{1ec}', ['\u{1ed}', '\u{0}', '\u{0}']), ('\u{1ee}', ['\u{1ef}', '\u{0}', '\u{0}']), - ('\u{1f1}', ['\u{1f3}', '\u{0}', '\u{0}']), ('\u{1f2}', ['\u{1f3}', '\u{0}', '\u{0}']), - ('\u{1f4}', ['\u{1f5}', '\u{0}', '\u{0}']), ('\u{1f6}', ['\u{195}', '\u{0}', '\u{0}']), - ('\u{1f7}', ['\u{1bf}', '\u{0}', '\u{0}']), ('\u{1f8}', ['\u{1f9}', '\u{0}', '\u{0}']), - ('\u{1fa}', ['\u{1fb}', '\u{0}', '\u{0}']), ('\u{1fc}', ['\u{1fd}', '\u{0}', '\u{0}']), - ('\u{1fe}', ['\u{1ff}', '\u{0}', '\u{0}']), ('\u{200}', ['\u{201}', '\u{0}', '\u{0}']), - ('\u{202}', ['\u{203}', '\u{0}', '\u{0}']), ('\u{204}', ['\u{205}', '\u{0}', '\u{0}']), - ('\u{206}', ['\u{207}', '\u{0}', '\u{0}']), ('\u{208}', ['\u{209}', '\u{0}', '\u{0}']), - ('\u{20a}', ['\u{20b}', '\u{0}', '\u{0}']), ('\u{20c}', ['\u{20d}', '\u{0}', '\u{0}']), - ('\u{20e}', ['\u{20f}', '\u{0}', '\u{0}']), ('\u{210}', ['\u{211}', '\u{0}', '\u{0}']), - ('\u{212}', ['\u{213}', '\u{0}', '\u{0}']), ('\u{214}', ['\u{215}', '\u{0}', '\u{0}']), - ('\u{216}', ['\u{217}', '\u{0}', '\u{0}']), ('\u{218}', ['\u{219}', '\u{0}', '\u{0}']), - ('\u{21a}', ['\u{21b}', '\u{0}', '\u{0}']), ('\u{21c}', ['\u{21d}', '\u{0}', '\u{0}']), - ('\u{21e}', ['\u{21f}', '\u{0}', '\u{0}']), ('\u{220}', ['\u{19e}', '\u{0}', '\u{0}']), - ('\u{222}', ['\u{223}', '\u{0}', '\u{0}']), ('\u{224}', ['\u{225}', '\u{0}', '\u{0}']), - ('\u{226}', ['\u{227}', '\u{0}', '\u{0}']), ('\u{228}', ['\u{229}', '\u{0}', '\u{0}']), - ('\u{22a}', ['\u{22b}', '\u{0}', '\u{0}']), ('\u{22c}', ['\u{22d}', '\u{0}', '\u{0}']), - ('\u{22e}', ['\u{22f}', '\u{0}', '\u{0}']), ('\u{230}', ['\u{231}', '\u{0}', '\u{0}']), - ('\u{232}', ['\u{233}', '\u{0}', '\u{0}']), ('\u{23a}', ['\u{2c65}', '\u{0}', '\u{0}']), - ('\u{23b}', ['\u{23c}', '\u{0}', '\u{0}']), ('\u{23d}', ['\u{19a}', '\u{0}', '\u{0}']), - ('\u{23e}', ['\u{2c66}', '\u{0}', '\u{0}']), ('\u{241}', ['\u{242}', '\u{0}', '\u{0}']), - ('\u{243}', ['\u{180}', '\u{0}', '\u{0}']), ('\u{244}', ['\u{289}', '\u{0}', '\u{0}']), - ('\u{245}', ['\u{28c}', '\u{0}', '\u{0}']), ('\u{246}', ['\u{247}', '\u{0}', '\u{0}']), - ('\u{248}', ['\u{249}', '\u{0}', '\u{0}']), ('\u{24a}', ['\u{24b}', '\u{0}', '\u{0}']), - ('\u{24c}', ['\u{24d}', '\u{0}', '\u{0}']), ('\u{24e}', ['\u{24f}', '\u{0}', '\u{0}']), - ('\u{370}', ['\u{371}', '\u{0}', '\u{0}']), ('\u{372}', ['\u{373}', '\u{0}', '\u{0}']), - ('\u{376}', ['\u{377}', '\u{0}', '\u{0}']), ('\u{37f}', ['\u{3f3}', '\u{0}', '\u{0}']), - ('\u{386}', ['\u{3ac}', '\u{0}', '\u{0}']), ('\u{388}', ['\u{3ad}', '\u{0}', '\u{0}']), - ('\u{389}', ['\u{3ae}', '\u{0}', '\u{0}']), ('\u{38a}', ['\u{3af}', '\u{0}', '\u{0}']), - ('\u{38c}', ['\u{3cc}', '\u{0}', '\u{0}']), ('\u{38e}', ['\u{3cd}', '\u{0}', '\u{0}']), - ('\u{38f}', ['\u{3ce}', '\u{0}', '\u{0}']), ('\u{391}', ['\u{3b1}', '\u{0}', '\u{0}']), - ('\u{392}', ['\u{3b2}', '\u{0}', '\u{0}']), ('\u{393}', ['\u{3b3}', '\u{0}', '\u{0}']), - ('\u{394}', ['\u{3b4}', '\u{0}', '\u{0}']), ('\u{395}', ['\u{3b5}', '\u{0}', '\u{0}']), - ('\u{396}', ['\u{3b6}', '\u{0}', '\u{0}']), ('\u{397}', ['\u{3b7}', '\u{0}', '\u{0}']), - ('\u{398}', ['\u{3b8}', '\u{0}', '\u{0}']), ('\u{399}', ['\u{3b9}', '\u{0}', '\u{0}']), - ('\u{39a}', ['\u{3ba}', '\u{0}', '\u{0}']), ('\u{39b}', ['\u{3bb}', '\u{0}', '\u{0}']), - ('\u{39c}', ['\u{3bc}', '\u{0}', '\u{0}']), ('\u{39d}', ['\u{3bd}', '\u{0}', '\u{0}']), - ('\u{39e}', ['\u{3be}', '\u{0}', '\u{0}']), ('\u{39f}', ['\u{3bf}', '\u{0}', '\u{0}']), - ('\u{3a0}', ['\u{3c0}', '\u{0}', '\u{0}']), ('\u{3a1}', ['\u{3c1}', '\u{0}', '\u{0}']), - ('\u{3a3}', ['\u{3c3}', '\u{0}', '\u{0}']), ('\u{3a4}', ['\u{3c4}', '\u{0}', '\u{0}']), - ('\u{3a5}', ['\u{3c5}', '\u{0}', '\u{0}']), ('\u{3a6}', ['\u{3c6}', '\u{0}', '\u{0}']), - ('\u{3a7}', ['\u{3c7}', '\u{0}', '\u{0}']), ('\u{3a8}', ['\u{3c8}', '\u{0}', '\u{0}']), - ('\u{3a9}', ['\u{3c9}', '\u{0}', '\u{0}']), ('\u{3aa}', ['\u{3ca}', '\u{0}', '\u{0}']), - ('\u{3ab}', ['\u{3cb}', '\u{0}', '\u{0}']), ('\u{3cf}', ['\u{3d7}', '\u{0}', '\u{0}']), - ('\u{3d8}', ['\u{3d9}', '\u{0}', '\u{0}']), ('\u{3da}', ['\u{3db}', '\u{0}', '\u{0}']), - ('\u{3dc}', ['\u{3dd}', '\u{0}', '\u{0}']), ('\u{3de}', ['\u{3df}', '\u{0}', '\u{0}']), - ('\u{3e0}', ['\u{3e1}', '\u{0}', '\u{0}']), ('\u{3e2}', ['\u{3e3}', '\u{0}', '\u{0}']), - ('\u{3e4}', ['\u{3e5}', '\u{0}', '\u{0}']), ('\u{3e6}', ['\u{3e7}', '\u{0}', '\u{0}']), - ('\u{3e8}', ['\u{3e9}', '\u{0}', '\u{0}']), ('\u{3ea}', ['\u{3eb}', '\u{0}', '\u{0}']), - ('\u{3ec}', ['\u{3ed}', '\u{0}', '\u{0}']), ('\u{3ee}', ['\u{3ef}', '\u{0}', '\u{0}']), - ('\u{3f4}', ['\u{3b8}', '\u{0}', '\u{0}']), ('\u{3f7}', ['\u{3f8}', '\u{0}', '\u{0}']), - ('\u{3f9}', ['\u{3f2}', '\u{0}', '\u{0}']), ('\u{3fa}', ['\u{3fb}', '\u{0}', '\u{0}']), - ('\u{3fd}', ['\u{37b}', '\u{0}', '\u{0}']), ('\u{3fe}', ['\u{37c}', '\u{0}', '\u{0}']), - ('\u{3ff}', ['\u{37d}', '\u{0}', '\u{0}']), ('\u{400}', ['\u{450}', '\u{0}', '\u{0}']), - ('\u{401}', ['\u{451}', '\u{0}', '\u{0}']), ('\u{402}', ['\u{452}', '\u{0}', '\u{0}']), - ('\u{403}', ['\u{453}', '\u{0}', '\u{0}']), ('\u{404}', ['\u{454}', '\u{0}', '\u{0}']), - ('\u{405}', ['\u{455}', '\u{0}', '\u{0}']), ('\u{406}', ['\u{456}', '\u{0}', '\u{0}']), - ('\u{407}', ['\u{457}', '\u{0}', '\u{0}']), ('\u{408}', ['\u{458}', '\u{0}', '\u{0}']), - ('\u{409}', ['\u{459}', '\u{0}', '\u{0}']), ('\u{40a}', ['\u{45a}', '\u{0}', '\u{0}']), - ('\u{40b}', ['\u{45b}', '\u{0}', '\u{0}']), ('\u{40c}', ['\u{45c}', '\u{0}', '\u{0}']), - ('\u{40d}', ['\u{45d}', '\u{0}', '\u{0}']), ('\u{40e}', ['\u{45e}', '\u{0}', '\u{0}']), - ('\u{40f}', ['\u{45f}', '\u{0}', '\u{0}']), ('\u{410}', ['\u{430}', '\u{0}', '\u{0}']), - ('\u{411}', ['\u{431}', '\u{0}', '\u{0}']), ('\u{412}', ['\u{432}', '\u{0}', '\u{0}']), - ('\u{413}', ['\u{433}', '\u{0}', '\u{0}']), ('\u{414}', ['\u{434}', '\u{0}', '\u{0}']), - ('\u{415}', ['\u{435}', '\u{0}', '\u{0}']), ('\u{416}', ['\u{436}', '\u{0}', '\u{0}']), - ('\u{417}', ['\u{437}', '\u{0}', '\u{0}']), ('\u{418}', ['\u{438}', '\u{0}', '\u{0}']), - ('\u{419}', ['\u{439}', '\u{0}', '\u{0}']), ('\u{41a}', ['\u{43a}', '\u{0}', '\u{0}']), - ('\u{41b}', ['\u{43b}', '\u{0}', '\u{0}']), ('\u{41c}', ['\u{43c}', '\u{0}', '\u{0}']), - ('\u{41d}', ['\u{43d}', '\u{0}', '\u{0}']), ('\u{41e}', ['\u{43e}', '\u{0}', '\u{0}']), - ('\u{41f}', ['\u{43f}', '\u{0}', '\u{0}']), ('\u{420}', ['\u{440}', '\u{0}', '\u{0}']), - ('\u{421}', ['\u{441}', '\u{0}', '\u{0}']), ('\u{422}', ['\u{442}', '\u{0}', '\u{0}']), - ('\u{423}', ['\u{443}', '\u{0}', '\u{0}']), ('\u{424}', ['\u{444}', '\u{0}', '\u{0}']), - ('\u{425}', ['\u{445}', '\u{0}', '\u{0}']), ('\u{426}', ['\u{446}', '\u{0}', '\u{0}']), - ('\u{427}', ['\u{447}', '\u{0}', '\u{0}']), ('\u{428}', ['\u{448}', '\u{0}', '\u{0}']), - ('\u{429}', ['\u{449}', '\u{0}', '\u{0}']), ('\u{42a}', ['\u{44a}', '\u{0}', '\u{0}']), - ('\u{42b}', ['\u{44b}', '\u{0}', '\u{0}']), ('\u{42c}', ['\u{44c}', '\u{0}', '\u{0}']), - ('\u{42d}', ['\u{44d}', '\u{0}', '\u{0}']), ('\u{42e}', ['\u{44e}', '\u{0}', '\u{0}']), - ('\u{42f}', ['\u{44f}', '\u{0}', '\u{0}']), ('\u{460}', ['\u{461}', '\u{0}', '\u{0}']), - ('\u{462}', ['\u{463}', '\u{0}', '\u{0}']), ('\u{464}', ['\u{465}', '\u{0}', '\u{0}']), - ('\u{466}', ['\u{467}', '\u{0}', '\u{0}']), ('\u{468}', ['\u{469}', '\u{0}', '\u{0}']), - ('\u{46a}', ['\u{46b}', '\u{0}', '\u{0}']), ('\u{46c}', ['\u{46d}', '\u{0}', '\u{0}']), - ('\u{46e}', ['\u{46f}', '\u{0}', '\u{0}']), ('\u{470}', ['\u{471}', '\u{0}', '\u{0}']), - ('\u{472}', ['\u{473}', '\u{0}', '\u{0}']), ('\u{474}', ['\u{475}', '\u{0}', '\u{0}']), - ('\u{476}', ['\u{477}', '\u{0}', '\u{0}']), ('\u{478}', ['\u{479}', '\u{0}', '\u{0}']), - ('\u{47a}', ['\u{47b}', '\u{0}', '\u{0}']), ('\u{47c}', ['\u{47d}', '\u{0}', '\u{0}']), - ('\u{47e}', ['\u{47f}', '\u{0}', '\u{0}']), ('\u{480}', ['\u{481}', '\u{0}', '\u{0}']), - ('\u{48a}', ['\u{48b}', '\u{0}', '\u{0}']), ('\u{48c}', ['\u{48d}', '\u{0}', '\u{0}']), - ('\u{48e}', ['\u{48f}', '\u{0}', '\u{0}']), ('\u{490}', ['\u{491}', '\u{0}', '\u{0}']), - ('\u{492}', ['\u{493}', '\u{0}', '\u{0}']), ('\u{494}', ['\u{495}', '\u{0}', '\u{0}']), - ('\u{496}', ['\u{497}', '\u{0}', '\u{0}']), ('\u{498}', ['\u{499}', '\u{0}', '\u{0}']), - ('\u{49a}', ['\u{49b}', '\u{0}', '\u{0}']), ('\u{49c}', ['\u{49d}', '\u{0}', '\u{0}']), - ('\u{49e}', ['\u{49f}', '\u{0}', '\u{0}']), ('\u{4a0}', ['\u{4a1}', '\u{0}', '\u{0}']), - ('\u{4a2}', ['\u{4a3}', '\u{0}', '\u{0}']), ('\u{4a4}', ['\u{4a5}', '\u{0}', '\u{0}']), - ('\u{4a6}', ['\u{4a7}', '\u{0}', '\u{0}']), ('\u{4a8}', ['\u{4a9}', '\u{0}', '\u{0}']), - ('\u{4aa}', ['\u{4ab}', '\u{0}', '\u{0}']), ('\u{4ac}', ['\u{4ad}', '\u{0}', '\u{0}']), - ('\u{4ae}', ['\u{4af}', '\u{0}', '\u{0}']), ('\u{4b0}', ['\u{4b1}', '\u{0}', '\u{0}']), - ('\u{4b2}', ['\u{4b3}', '\u{0}', '\u{0}']), ('\u{4b4}', ['\u{4b5}', '\u{0}', '\u{0}']), - ('\u{4b6}', ['\u{4b7}', '\u{0}', '\u{0}']), ('\u{4b8}', ['\u{4b9}', '\u{0}', '\u{0}']), - ('\u{4ba}', ['\u{4bb}', '\u{0}', '\u{0}']), ('\u{4bc}', ['\u{4bd}', '\u{0}', '\u{0}']), - ('\u{4be}', ['\u{4bf}', '\u{0}', '\u{0}']), ('\u{4c0}', ['\u{4cf}', '\u{0}', '\u{0}']), - ('\u{4c1}', ['\u{4c2}', '\u{0}', '\u{0}']), ('\u{4c3}', ['\u{4c4}', '\u{0}', '\u{0}']), - ('\u{4c5}', ['\u{4c6}', '\u{0}', '\u{0}']), ('\u{4c7}', ['\u{4c8}', '\u{0}', '\u{0}']), - ('\u{4c9}', ['\u{4ca}', '\u{0}', '\u{0}']), ('\u{4cb}', ['\u{4cc}', '\u{0}', '\u{0}']), - ('\u{4cd}', ['\u{4ce}', '\u{0}', '\u{0}']), ('\u{4d0}', ['\u{4d1}', '\u{0}', '\u{0}']), - ('\u{4d2}', ['\u{4d3}', '\u{0}', '\u{0}']), ('\u{4d4}', ['\u{4d5}', '\u{0}', '\u{0}']), - ('\u{4d6}', ['\u{4d7}', '\u{0}', '\u{0}']), ('\u{4d8}', ['\u{4d9}', '\u{0}', '\u{0}']), - ('\u{4da}', ['\u{4db}', '\u{0}', '\u{0}']), ('\u{4dc}', ['\u{4dd}', '\u{0}', '\u{0}']), - ('\u{4de}', ['\u{4df}', '\u{0}', '\u{0}']), ('\u{4e0}', ['\u{4e1}', '\u{0}', '\u{0}']), - ('\u{4e2}', ['\u{4e3}', '\u{0}', '\u{0}']), ('\u{4e4}', ['\u{4e5}', '\u{0}', '\u{0}']), - ('\u{4e6}', ['\u{4e7}', '\u{0}', '\u{0}']), ('\u{4e8}', ['\u{4e9}', '\u{0}', '\u{0}']), - ('\u{4ea}', ['\u{4eb}', '\u{0}', '\u{0}']), ('\u{4ec}', ['\u{4ed}', '\u{0}', '\u{0}']), - ('\u{4ee}', ['\u{4ef}', '\u{0}', '\u{0}']), ('\u{4f0}', ['\u{4f1}', '\u{0}', '\u{0}']), - ('\u{4f2}', ['\u{4f3}', '\u{0}', '\u{0}']), ('\u{4f4}', ['\u{4f5}', '\u{0}', '\u{0}']), - ('\u{4f6}', ['\u{4f7}', '\u{0}', '\u{0}']), ('\u{4f8}', ['\u{4f9}', '\u{0}', '\u{0}']), - ('\u{4fa}', ['\u{4fb}', '\u{0}', '\u{0}']), ('\u{4fc}', ['\u{4fd}', '\u{0}', '\u{0}']), - ('\u{4fe}', ['\u{4ff}', '\u{0}', '\u{0}']), ('\u{500}', ['\u{501}', '\u{0}', '\u{0}']), - ('\u{502}', ['\u{503}', '\u{0}', '\u{0}']), ('\u{504}', ['\u{505}', '\u{0}', '\u{0}']), - ('\u{506}', ['\u{507}', '\u{0}', '\u{0}']), ('\u{508}', ['\u{509}', '\u{0}', '\u{0}']), - ('\u{50a}', ['\u{50b}', '\u{0}', '\u{0}']), ('\u{50c}', ['\u{50d}', '\u{0}', '\u{0}']), - ('\u{50e}', ['\u{50f}', '\u{0}', '\u{0}']), ('\u{510}', ['\u{511}', '\u{0}', '\u{0}']), - ('\u{512}', ['\u{513}', '\u{0}', '\u{0}']), ('\u{514}', ['\u{515}', '\u{0}', '\u{0}']), - ('\u{516}', ['\u{517}', '\u{0}', '\u{0}']), ('\u{518}', ['\u{519}', '\u{0}', '\u{0}']), - ('\u{51a}', ['\u{51b}', '\u{0}', '\u{0}']), ('\u{51c}', ['\u{51d}', '\u{0}', '\u{0}']), - ('\u{51e}', ['\u{51f}', '\u{0}', '\u{0}']), ('\u{520}', ['\u{521}', '\u{0}', '\u{0}']), - ('\u{522}', ['\u{523}', '\u{0}', '\u{0}']), ('\u{524}', ['\u{525}', '\u{0}', '\u{0}']), - ('\u{526}', ['\u{527}', '\u{0}', '\u{0}']), ('\u{528}', ['\u{529}', '\u{0}', '\u{0}']), - ('\u{52a}', ['\u{52b}', '\u{0}', '\u{0}']), ('\u{52c}', ['\u{52d}', '\u{0}', '\u{0}']), - ('\u{52e}', ['\u{52f}', '\u{0}', '\u{0}']), ('\u{531}', ['\u{561}', '\u{0}', '\u{0}']), - ('\u{532}', ['\u{562}', '\u{0}', '\u{0}']), ('\u{533}', ['\u{563}', '\u{0}', '\u{0}']), - ('\u{534}', ['\u{564}', '\u{0}', '\u{0}']), ('\u{535}', ['\u{565}', '\u{0}', '\u{0}']), - ('\u{536}', ['\u{566}', '\u{0}', '\u{0}']), ('\u{537}', ['\u{567}', '\u{0}', '\u{0}']), - ('\u{538}', ['\u{568}', '\u{0}', '\u{0}']), ('\u{539}', ['\u{569}', '\u{0}', '\u{0}']), - ('\u{53a}', ['\u{56a}', '\u{0}', '\u{0}']), ('\u{53b}', ['\u{56b}', '\u{0}', '\u{0}']), - ('\u{53c}', ['\u{56c}', '\u{0}', '\u{0}']), ('\u{53d}', ['\u{56d}', '\u{0}', '\u{0}']), - ('\u{53e}', ['\u{56e}', '\u{0}', '\u{0}']), ('\u{53f}', ['\u{56f}', '\u{0}', '\u{0}']), - ('\u{540}', ['\u{570}', '\u{0}', '\u{0}']), ('\u{541}', ['\u{571}', '\u{0}', '\u{0}']), - ('\u{542}', ['\u{572}', '\u{0}', '\u{0}']), ('\u{543}', ['\u{573}', '\u{0}', '\u{0}']), - ('\u{544}', ['\u{574}', '\u{0}', '\u{0}']), ('\u{545}', ['\u{575}', '\u{0}', '\u{0}']), - ('\u{546}', ['\u{576}', '\u{0}', '\u{0}']), ('\u{547}', ['\u{577}', '\u{0}', '\u{0}']), - ('\u{548}', ['\u{578}', '\u{0}', '\u{0}']), ('\u{549}', ['\u{579}', '\u{0}', '\u{0}']), - ('\u{54a}', ['\u{57a}', '\u{0}', '\u{0}']), ('\u{54b}', ['\u{57b}', '\u{0}', '\u{0}']), - ('\u{54c}', ['\u{57c}', '\u{0}', '\u{0}']), ('\u{54d}', ['\u{57d}', '\u{0}', '\u{0}']), - ('\u{54e}', ['\u{57e}', '\u{0}', '\u{0}']), ('\u{54f}', ['\u{57f}', '\u{0}', '\u{0}']), - ('\u{550}', ['\u{580}', '\u{0}', '\u{0}']), ('\u{551}', ['\u{581}', '\u{0}', '\u{0}']), - ('\u{552}', ['\u{582}', '\u{0}', '\u{0}']), ('\u{553}', ['\u{583}', '\u{0}', '\u{0}']), - ('\u{554}', ['\u{584}', '\u{0}', '\u{0}']), ('\u{555}', ['\u{585}', '\u{0}', '\u{0}']), - ('\u{556}', ['\u{586}', '\u{0}', '\u{0}']), ('\u{10a0}', ['\u{2d00}', '\u{0}', '\u{0}']), - ('\u{10a1}', ['\u{2d01}', '\u{0}', '\u{0}']), ('\u{10a2}', ['\u{2d02}', '\u{0}', '\u{0}']), - ('\u{10a3}', ['\u{2d03}', '\u{0}', '\u{0}']), ('\u{10a4}', ['\u{2d04}', '\u{0}', '\u{0}']), - ('\u{10a5}', ['\u{2d05}', '\u{0}', '\u{0}']), ('\u{10a6}', ['\u{2d06}', '\u{0}', '\u{0}']), - ('\u{10a7}', ['\u{2d07}', '\u{0}', '\u{0}']), ('\u{10a8}', ['\u{2d08}', '\u{0}', '\u{0}']), - ('\u{10a9}', ['\u{2d09}', '\u{0}', '\u{0}']), ('\u{10aa}', ['\u{2d0a}', '\u{0}', '\u{0}']), - ('\u{10ab}', ['\u{2d0b}', '\u{0}', '\u{0}']), ('\u{10ac}', ['\u{2d0c}', '\u{0}', '\u{0}']), - ('\u{10ad}', ['\u{2d0d}', '\u{0}', '\u{0}']), ('\u{10ae}', ['\u{2d0e}', '\u{0}', '\u{0}']), - ('\u{10af}', ['\u{2d0f}', '\u{0}', '\u{0}']), ('\u{10b0}', ['\u{2d10}', '\u{0}', '\u{0}']), - ('\u{10b1}', ['\u{2d11}', '\u{0}', '\u{0}']), ('\u{10b2}', ['\u{2d12}', '\u{0}', '\u{0}']), - ('\u{10b3}', ['\u{2d13}', '\u{0}', '\u{0}']), ('\u{10b4}', ['\u{2d14}', '\u{0}', '\u{0}']), - ('\u{10b5}', ['\u{2d15}', '\u{0}', '\u{0}']), ('\u{10b6}', ['\u{2d16}', '\u{0}', '\u{0}']), - ('\u{10b7}', ['\u{2d17}', '\u{0}', '\u{0}']), ('\u{10b8}', ['\u{2d18}', '\u{0}', '\u{0}']), - ('\u{10b9}', ['\u{2d19}', '\u{0}', '\u{0}']), ('\u{10ba}', ['\u{2d1a}', '\u{0}', '\u{0}']), - ('\u{10bb}', ['\u{2d1b}', '\u{0}', '\u{0}']), ('\u{10bc}', ['\u{2d1c}', '\u{0}', '\u{0}']), - ('\u{10bd}', ['\u{2d1d}', '\u{0}', '\u{0}']), ('\u{10be}', ['\u{2d1e}', '\u{0}', '\u{0}']), - ('\u{10bf}', ['\u{2d1f}', '\u{0}', '\u{0}']), ('\u{10c0}', ['\u{2d20}', '\u{0}', '\u{0}']), - ('\u{10c1}', ['\u{2d21}', '\u{0}', '\u{0}']), ('\u{10c2}', ['\u{2d22}', '\u{0}', '\u{0}']), - ('\u{10c3}', ['\u{2d23}', '\u{0}', '\u{0}']), ('\u{10c4}', ['\u{2d24}', '\u{0}', '\u{0}']), - ('\u{10c5}', ['\u{2d25}', '\u{0}', '\u{0}']), ('\u{10c7}', ['\u{2d27}', '\u{0}', '\u{0}']), - ('\u{10cd}', ['\u{2d2d}', '\u{0}', '\u{0}']), ('\u{13a0}', ['\u{ab70}', '\u{0}', '\u{0}']), - ('\u{13a1}', ['\u{ab71}', '\u{0}', '\u{0}']), ('\u{13a2}', ['\u{ab72}', '\u{0}', '\u{0}']), - ('\u{13a3}', ['\u{ab73}', '\u{0}', '\u{0}']), ('\u{13a4}', ['\u{ab74}', '\u{0}', '\u{0}']), - ('\u{13a5}', ['\u{ab75}', '\u{0}', '\u{0}']), ('\u{13a6}', ['\u{ab76}', '\u{0}', '\u{0}']), - ('\u{13a7}', ['\u{ab77}', '\u{0}', '\u{0}']), ('\u{13a8}', ['\u{ab78}', '\u{0}', '\u{0}']), - ('\u{13a9}', ['\u{ab79}', '\u{0}', '\u{0}']), ('\u{13aa}', ['\u{ab7a}', '\u{0}', '\u{0}']), - ('\u{13ab}', ['\u{ab7b}', '\u{0}', '\u{0}']), ('\u{13ac}', ['\u{ab7c}', '\u{0}', '\u{0}']), - ('\u{13ad}', ['\u{ab7d}', '\u{0}', '\u{0}']), ('\u{13ae}', ['\u{ab7e}', '\u{0}', '\u{0}']), - ('\u{13af}', ['\u{ab7f}', '\u{0}', '\u{0}']), ('\u{13b0}', ['\u{ab80}', '\u{0}', '\u{0}']), - ('\u{13b1}', ['\u{ab81}', '\u{0}', '\u{0}']), ('\u{13b2}', ['\u{ab82}', '\u{0}', '\u{0}']), - ('\u{13b3}', ['\u{ab83}', '\u{0}', '\u{0}']), ('\u{13b4}', ['\u{ab84}', '\u{0}', '\u{0}']), - ('\u{13b5}', ['\u{ab85}', '\u{0}', '\u{0}']), ('\u{13b6}', ['\u{ab86}', '\u{0}', '\u{0}']), - ('\u{13b7}', ['\u{ab87}', '\u{0}', '\u{0}']), ('\u{13b8}', ['\u{ab88}', '\u{0}', '\u{0}']), - ('\u{13b9}', ['\u{ab89}', '\u{0}', '\u{0}']), ('\u{13ba}', ['\u{ab8a}', '\u{0}', '\u{0}']), - ('\u{13bb}', ['\u{ab8b}', '\u{0}', '\u{0}']), ('\u{13bc}', ['\u{ab8c}', '\u{0}', '\u{0}']), - ('\u{13bd}', ['\u{ab8d}', '\u{0}', '\u{0}']), ('\u{13be}', ['\u{ab8e}', '\u{0}', '\u{0}']), - ('\u{13bf}', ['\u{ab8f}', '\u{0}', '\u{0}']), ('\u{13c0}', ['\u{ab90}', '\u{0}', '\u{0}']), - ('\u{13c1}', ['\u{ab91}', '\u{0}', '\u{0}']), ('\u{13c2}', ['\u{ab92}', '\u{0}', '\u{0}']), - ('\u{13c3}', ['\u{ab93}', '\u{0}', '\u{0}']), ('\u{13c4}', ['\u{ab94}', '\u{0}', '\u{0}']), - ('\u{13c5}', ['\u{ab95}', '\u{0}', '\u{0}']), ('\u{13c6}', ['\u{ab96}', '\u{0}', '\u{0}']), - ('\u{13c7}', ['\u{ab97}', '\u{0}', '\u{0}']), ('\u{13c8}', ['\u{ab98}', '\u{0}', '\u{0}']), - ('\u{13c9}', ['\u{ab99}', '\u{0}', '\u{0}']), ('\u{13ca}', ['\u{ab9a}', '\u{0}', '\u{0}']), - ('\u{13cb}', ['\u{ab9b}', '\u{0}', '\u{0}']), ('\u{13cc}', ['\u{ab9c}', '\u{0}', '\u{0}']), - ('\u{13cd}', ['\u{ab9d}', '\u{0}', '\u{0}']), ('\u{13ce}', ['\u{ab9e}', '\u{0}', '\u{0}']), - ('\u{13cf}', ['\u{ab9f}', '\u{0}', '\u{0}']), ('\u{13d0}', ['\u{aba0}', '\u{0}', '\u{0}']), - ('\u{13d1}', ['\u{aba1}', '\u{0}', '\u{0}']), ('\u{13d2}', ['\u{aba2}', '\u{0}', '\u{0}']), - ('\u{13d3}', ['\u{aba3}', '\u{0}', '\u{0}']), ('\u{13d4}', ['\u{aba4}', '\u{0}', '\u{0}']), - ('\u{13d5}', ['\u{aba5}', '\u{0}', '\u{0}']), ('\u{13d6}', ['\u{aba6}', '\u{0}', '\u{0}']), - ('\u{13d7}', ['\u{aba7}', '\u{0}', '\u{0}']), ('\u{13d8}', ['\u{aba8}', '\u{0}', '\u{0}']), - ('\u{13d9}', ['\u{aba9}', '\u{0}', '\u{0}']), ('\u{13da}', ['\u{abaa}', '\u{0}', '\u{0}']), - ('\u{13db}', ['\u{abab}', '\u{0}', '\u{0}']), ('\u{13dc}', ['\u{abac}', '\u{0}', '\u{0}']), - ('\u{13dd}', ['\u{abad}', '\u{0}', '\u{0}']), ('\u{13de}', ['\u{abae}', '\u{0}', '\u{0}']), - ('\u{13df}', ['\u{abaf}', '\u{0}', '\u{0}']), ('\u{13e0}', ['\u{abb0}', '\u{0}', '\u{0}']), - ('\u{13e1}', ['\u{abb1}', '\u{0}', '\u{0}']), ('\u{13e2}', ['\u{abb2}', '\u{0}', '\u{0}']), - ('\u{13e3}', ['\u{abb3}', '\u{0}', '\u{0}']), ('\u{13e4}', ['\u{abb4}', '\u{0}', '\u{0}']), - ('\u{13e5}', ['\u{abb5}', '\u{0}', '\u{0}']), ('\u{13e6}', ['\u{abb6}', '\u{0}', '\u{0}']), - ('\u{13e7}', ['\u{abb7}', '\u{0}', '\u{0}']), ('\u{13e8}', ['\u{abb8}', '\u{0}', '\u{0}']), - ('\u{13e9}', ['\u{abb9}', '\u{0}', '\u{0}']), ('\u{13ea}', ['\u{abba}', '\u{0}', '\u{0}']), - ('\u{13eb}', ['\u{abbb}', '\u{0}', '\u{0}']), ('\u{13ec}', ['\u{abbc}', '\u{0}', '\u{0}']), - ('\u{13ed}', ['\u{abbd}', '\u{0}', '\u{0}']), ('\u{13ee}', ['\u{abbe}', '\u{0}', '\u{0}']), - ('\u{13ef}', ['\u{abbf}', '\u{0}', '\u{0}']), ('\u{13f0}', ['\u{13f8}', '\u{0}', '\u{0}']), - ('\u{13f1}', ['\u{13f9}', '\u{0}', '\u{0}']), ('\u{13f2}', ['\u{13fa}', '\u{0}', '\u{0}']), - ('\u{13f3}', ['\u{13fb}', '\u{0}', '\u{0}']), ('\u{13f4}', ['\u{13fc}', '\u{0}', '\u{0}']), - ('\u{13f5}', ['\u{13fd}', '\u{0}', '\u{0}']), ('\u{1c89}', ['\u{1c8a}', '\u{0}', '\u{0}']), - ('\u{1c90}', ['\u{10d0}', '\u{0}', '\u{0}']), ('\u{1c91}', ['\u{10d1}', '\u{0}', '\u{0}']), - ('\u{1c92}', ['\u{10d2}', '\u{0}', '\u{0}']), ('\u{1c93}', ['\u{10d3}', '\u{0}', '\u{0}']), - ('\u{1c94}', ['\u{10d4}', '\u{0}', '\u{0}']), ('\u{1c95}', ['\u{10d5}', '\u{0}', '\u{0}']), - ('\u{1c96}', ['\u{10d6}', '\u{0}', '\u{0}']), ('\u{1c97}', ['\u{10d7}', '\u{0}', '\u{0}']), - ('\u{1c98}', ['\u{10d8}', '\u{0}', '\u{0}']), ('\u{1c99}', ['\u{10d9}', '\u{0}', '\u{0}']), - ('\u{1c9a}', ['\u{10da}', '\u{0}', '\u{0}']), ('\u{1c9b}', ['\u{10db}', '\u{0}', '\u{0}']), - ('\u{1c9c}', ['\u{10dc}', '\u{0}', '\u{0}']), ('\u{1c9d}', ['\u{10dd}', '\u{0}', '\u{0}']), - ('\u{1c9e}', ['\u{10de}', '\u{0}', '\u{0}']), ('\u{1c9f}', ['\u{10df}', '\u{0}', '\u{0}']), - ('\u{1ca0}', ['\u{10e0}', '\u{0}', '\u{0}']), ('\u{1ca1}', ['\u{10e1}', '\u{0}', '\u{0}']), - ('\u{1ca2}', ['\u{10e2}', '\u{0}', '\u{0}']), ('\u{1ca3}', ['\u{10e3}', '\u{0}', '\u{0}']), - ('\u{1ca4}', ['\u{10e4}', '\u{0}', '\u{0}']), ('\u{1ca5}', ['\u{10e5}', '\u{0}', '\u{0}']), - ('\u{1ca6}', ['\u{10e6}', '\u{0}', '\u{0}']), ('\u{1ca7}', ['\u{10e7}', '\u{0}', '\u{0}']), - ('\u{1ca8}', ['\u{10e8}', '\u{0}', '\u{0}']), ('\u{1ca9}', ['\u{10e9}', '\u{0}', '\u{0}']), - ('\u{1caa}', ['\u{10ea}', '\u{0}', '\u{0}']), ('\u{1cab}', ['\u{10eb}', '\u{0}', '\u{0}']), - ('\u{1cac}', ['\u{10ec}', '\u{0}', '\u{0}']), ('\u{1cad}', ['\u{10ed}', '\u{0}', '\u{0}']), - ('\u{1cae}', ['\u{10ee}', '\u{0}', '\u{0}']), ('\u{1caf}', ['\u{10ef}', '\u{0}', '\u{0}']), - ('\u{1cb0}', ['\u{10f0}', '\u{0}', '\u{0}']), ('\u{1cb1}', ['\u{10f1}', '\u{0}', '\u{0}']), - ('\u{1cb2}', ['\u{10f2}', '\u{0}', '\u{0}']), ('\u{1cb3}', ['\u{10f3}', '\u{0}', '\u{0}']), - ('\u{1cb4}', ['\u{10f4}', '\u{0}', '\u{0}']), ('\u{1cb5}', ['\u{10f5}', '\u{0}', '\u{0}']), - ('\u{1cb6}', ['\u{10f6}', '\u{0}', '\u{0}']), ('\u{1cb7}', ['\u{10f7}', '\u{0}', '\u{0}']), - ('\u{1cb8}', ['\u{10f8}', '\u{0}', '\u{0}']), ('\u{1cb9}', ['\u{10f9}', '\u{0}', '\u{0}']), - ('\u{1cba}', ['\u{10fa}', '\u{0}', '\u{0}']), ('\u{1cbd}', ['\u{10fd}', '\u{0}', '\u{0}']), - ('\u{1cbe}', ['\u{10fe}', '\u{0}', '\u{0}']), ('\u{1cbf}', ['\u{10ff}', '\u{0}', '\u{0}']), - ('\u{1e00}', ['\u{1e01}', '\u{0}', '\u{0}']), ('\u{1e02}', ['\u{1e03}', '\u{0}', '\u{0}']), - ('\u{1e04}', ['\u{1e05}', '\u{0}', '\u{0}']), ('\u{1e06}', ['\u{1e07}', '\u{0}', '\u{0}']), - ('\u{1e08}', ['\u{1e09}', '\u{0}', '\u{0}']), ('\u{1e0a}', ['\u{1e0b}', '\u{0}', '\u{0}']), - ('\u{1e0c}', ['\u{1e0d}', '\u{0}', '\u{0}']), ('\u{1e0e}', ['\u{1e0f}', '\u{0}', '\u{0}']), - ('\u{1e10}', ['\u{1e11}', '\u{0}', '\u{0}']), ('\u{1e12}', ['\u{1e13}', '\u{0}', '\u{0}']), - ('\u{1e14}', ['\u{1e15}', '\u{0}', '\u{0}']), ('\u{1e16}', ['\u{1e17}', '\u{0}', '\u{0}']), - ('\u{1e18}', ['\u{1e19}', '\u{0}', '\u{0}']), ('\u{1e1a}', ['\u{1e1b}', '\u{0}', '\u{0}']), - ('\u{1e1c}', ['\u{1e1d}', '\u{0}', '\u{0}']), ('\u{1e1e}', ['\u{1e1f}', '\u{0}', '\u{0}']), - ('\u{1e20}', ['\u{1e21}', '\u{0}', '\u{0}']), ('\u{1e22}', ['\u{1e23}', '\u{0}', '\u{0}']), - ('\u{1e24}', ['\u{1e25}', '\u{0}', '\u{0}']), ('\u{1e26}', ['\u{1e27}', '\u{0}', '\u{0}']), - ('\u{1e28}', ['\u{1e29}', '\u{0}', '\u{0}']), ('\u{1e2a}', ['\u{1e2b}', '\u{0}', '\u{0}']), - ('\u{1e2c}', ['\u{1e2d}', '\u{0}', '\u{0}']), ('\u{1e2e}', ['\u{1e2f}', '\u{0}', '\u{0}']), - ('\u{1e30}', ['\u{1e31}', '\u{0}', '\u{0}']), ('\u{1e32}', ['\u{1e33}', '\u{0}', '\u{0}']), - ('\u{1e34}', ['\u{1e35}', '\u{0}', '\u{0}']), ('\u{1e36}', ['\u{1e37}', '\u{0}', '\u{0}']), - ('\u{1e38}', ['\u{1e39}', '\u{0}', '\u{0}']), ('\u{1e3a}', ['\u{1e3b}', '\u{0}', '\u{0}']), - ('\u{1e3c}', ['\u{1e3d}', '\u{0}', '\u{0}']), ('\u{1e3e}', ['\u{1e3f}', '\u{0}', '\u{0}']), - ('\u{1e40}', ['\u{1e41}', '\u{0}', '\u{0}']), ('\u{1e42}', ['\u{1e43}', '\u{0}', '\u{0}']), - ('\u{1e44}', ['\u{1e45}', '\u{0}', '\u{0}']), ('\u{1e46}', ['\u{1e47}', '\u{0}', '\u{0}']), - ('\u{1e48}', ['\u{1e49}', '\u{0}', '\u{0}']), ('\u{1e4a}', ['\u{1e4b}', '\u{0}', '\u{0}']), - ('\u{1e4c}', ['\u{1e4d}', '\u{0}', '\u{0}']), ('\u{1e4e}', ['\u{1e4f}', '\u{0}', '\u{0}']), - ('\u{1e50}', ['\u{1e51}', '\u{0}', '\u{0}']), ('\u{1e52}', ['\u{1e53}', '\u{0}', '\u{0}']), - ('\u{1e54}', ['\u{1e55}', '\u{0}', '\u{0}']), ('\u{1e56}', ['\u{1e57}', '\u{0}', '\u{0}']), - ('\u{1e58}', ['\u{1e59}', '\u{0}', '\u{0}']), ('\u{1e5a}', ['\u{1e5b}', '\u{0}', '\u{0}']), - ('\u{1e5c}', ['\u{1e5d}', '\u{0}', '\u{0}']), ('\u{1e5e}', ['\u{1e5f}', '\u{0}', '\u{0}']), - ('\u{1e60}', ['\u{1e61}', '\u{0}', '\u{0}']), ('\u{1e62}', ['\u{1e63}', '\u{0}', '\u{0}']), - ('\u{1e64}', ['\u{1e65}', '\u{0}', '\u{0}']), ('\u{1e66}', ['\u{1e67}', '\u{0}', '\u{0}']), - ('\u{1e68}', ['\u{1e69}', '\u{0}', '\u{0}']), ('\u{1e6a}', ['\u{1e6b}', '\u{0}', '\u{0}']), - ('\u{1e6c}', ['\u{1e6d}', '\u{0}', '\u{0}']), ('\u{1e6e}', ['\u{1e6f}', '\u{0}', '\u{0}']), - ('\u{1e70}', ['\u{1e71}', '\u{0}', '\u{0}']), ('\u{1e72}', ['\u{1e73}', '\u{0}', '\u{0}']), - ('\u{1e74}', ['\u{1e75}', '\u{0}', '\u{0}']), ('\u{1e76}', ['\u{1e77}', '\u{0}', '\u{0}']), - ('\u{1e78}', ['\u{1e79}', '\u{0}', '\u{0}']), ('\u{1e7a}', ['\u{1e7b}', '\u{0}', '\u{0}']), - ('\u{1e7c}', ['\u{1e7d}', '\u{0}', '\u{0}']), ('\u{1e7e}', ['\u{1e7f}', '\u{0}', '\u{0}']), - ('\u{1e80}', ['\u{1e81}', '\u{0}', '\u{0}']), ('\u{1e82}', ['\u{1e83}', '\u{0}', '\u{0}']), - ('\u{1e84}', ['\u{1e85}', '\u{0}', '\u{0}']), ('\u{1e86}', ['\u{1e87}', '\u{0}', '\u{0}']), - ('\u{1e88}', ['\u{1e89}', '\u{0}', '\u{0}']), ('\u{1e8a}', ['\u{1e8b}', '\u{0}', '\u{0}']), - ('\u{1e8c}', ['\u{1e8d}', '\u{0}', '\u{0}']), ('\u{1e8e}', ['\u{1e8f}', '\u{0}', '\u{0}']), - ('\u{1e90}', ['\u{1e91}', '\u{0}', '\u{0}']), ('\u{1e92}', ['\u{1e93}', '\u{0}', '\u{0}']), - ('\u{1e94}', ['\u{1e95}', '\u{0}', '\u{0}']), ('\u{1e9e}', ['\u{df}', '\u{0}', '\u{0}']), - ('\u{1ea0}', ['\u{1ea1}', '\u{0}', '\u{0}']), ('\u{1ea2}', ['\u{1ea3}', '\u{0}', '\u{0}']), - ('\u{1ea4}', ['\u{1ea5}', '\u{0}', '\u{0}']), ('\u{1ea6}', ['\u{1ea7}', '\u{0}', '\u{0}']), - ('\u{1ea8}', ['\u{1ea9}', '\u{0}', '\u{0}']), ('\u{1eaa}', ['\u{1eab}', '\u{0}', '\u{0}']), - ('\u{1eac}', ['\u{1ead}', '\u{0}', '\u{0}']), ('\u{1eae}', ['\u{1eaf}', '\u{0}', '\u{0}']), - ('\u{1eb0}', ['\u{1eb1}', '\u{0}', '\u{0}']), ('\u{1eb2}', ['\u{1eb3}', '\u{0}', '\u{0}']), - ('\u{1eb4}', ['\u{1eb5}', '\u{0}', '\u{0}']), ('\u{1eb6}', ['\u{1eb7}', '\u{0}', '\u{0}']), - ('\u{1eb8}', ['\u{1eb9}', '\u{0}', '\u{0}']), ('\u{1eba}', ['\u{1ebb}', '\u{0}', '\u{0}']), - ('\u{1ebc}', ['\u{1ebd}', '\u{0}', '\u{0}']), ('\u{1ebe}', ['\u{1ebf}', '\u{0}', '\u{0}']), - ('\u{1ec0}', ['\u{1ec1}', '\u{0}', '\u{0}']), ('\u{1ec2}', ['\u{1ec3}', '\u{0}', '\u{0}']), - ('\u{1ec4}', ['\u{1ec5}', '\u{0}', '\u{0}']), ('\u{1ec6}', ['\u{1ec7}', '\u{0}', '\u{0}']), - ('\u{1ec8}', ['\u{1ec9}', '\u{0}', '\u{0}']), ('\u{1eca}', ['\u{1ecb}', '\u{0}', '\u{0}']), - ('\u{1ecc}', ['\u{1ecd}', '\u{0}', '\u{0}']), ('\u{1ece}', ['\u{1ecf}', '\u{0}', '\u{0}']), - ('\u{1ed0}', ['\u{1ed1}', '\u{0}', '\u{0}']), ('\u{1ed2}', ['\u{1ed3}', '\u{0}', '\u{0}']), - ('\u{1ed4}', ['\u{1ed5}', '\u{0}', '\u{0}']), ('\u{1ed6}', ['\u{1ed7}', '\u{0}', '\u{0}']), - ('\u{1ed8}', ['\u{1ed9}', '\u{0}', '\u{0}']), ('\u{1eda}', ['\u{1edb}', '\u{0}', '\u{0}']), - ('\u{1edc}', ['\u{1edd}', '\u{0}', '\u{0}']), ('\u{1ede}', ['\u{1edf}', '\u{0}', '\u{0}']), - ('\u{1ee0}', ['\u{1ee1}', '\u{0}', '\u{0}']), ('\u{1ee2}', ['\u{1ee3}', '\u{0}', '\u{0}']), - ('\u{1ee4}', ['\u{1ee5}', '\u{0}', '\u{0}']), ('\u{1ee6}', ['\u{1ee7}', '\u{0}', '\u{0}']), - ('\u{1ee8}', ['\u{1ee9}', '\u{0}', '\u{0}']), ('\u{1eea}', ['\u{1eeb}', '\u{0}', '\u{0}']), - ('\u{1eec}', ['\u{1eed}', '\u{0}', '\u{0}']), ('\u{1eee}', ['\u{1eef}', '\u{0}', '\u{0}']), - ('\u{1ef0}', ['\u{1ef1}', '\u{0}', '\u{0}']), ('\u{1ef2}', ['\u{1ef3}', '\u{0}', '\u{0}']), - ('\u{1ef4}', ['\u{1ef5}', '\u{0}', '\u{0}']), ('\u{1ef6}', ['\u{1ef7}', '\u{0}', '\u{0}']), - ('\u{1ef8}', ['\u{1ef9}', '\u{0}', '\u{0}']), ('\u{1efa}', ['\u{1efb}', '\u{0}', '\u{0}']), - ('\u{1efc}', ['\u{1efd}', '\u{0}', '\u{0}']), ('\u{1efe}', ['\u{1eff}', '\u{0}', '\u{0}']), - ('\u{1f08}', ['\u{1f00}', '\u{0}', '\u{0}']), ('\u{1f09}', ['\u{1f01}', '\u{0}', '\u{0}']), - ('\u{1f0a}', ['\u{1f02}', '\u{0}', '\u{0}']), ('\u{1f0b}', ['\u{1f03}', '\u{0}', '\u{0}']), - ('\u{1f0c}', ['\u{1f04}', '\u{0}', '\u{0}']), ('\u{1f0d}', ['\u{1f05}', '\u{0}', '\u{0}']), - ('\u{1f0e}', ['\u{1f06}', '\u{0}', '\u{0}']), ('\u{1f0f}', ['\u{1f07}', '\u{0}', '\u{0}']), - ('\u{1f18}', ['\u{1f10}', '\u{0}', '\u{0}']), ('\u{1f19}', ['\u{1f11}', '\u{0}', '\u{0}']), - ('\u{1f1a}', ['\u{1f12}', '\u{0}', '\u{0}']), ('\u{1f1b}', ['\u{1f13}', '\u{0}', '\u{0}']), - ('\u{1f1c}', ['\u{1f14}', '\u{0}', '\u{0}']), ('\u{1f1d}', ['\u{1f15}', '\u{0}', '\u{0}']), - ('\u{1f28}', ['\u{1f20}', '\u{0}', '\u{0}']), ('\u{1f29}', ['\u{1f21}', '\u{0}', '\u{0}']), - ('\u{1f2a}', ['\u{1f22}', '\u{0}', '\u{0}']), ('\u{1f2b}', ['\u{1f23}', '\u{0}', '\u{0}']), - ('\u{1f2c}', ['\u{1f24}', '\u{0}', '\u{0}']), ('\u{1f2d}', ['\u{1f25}', '\u{0}', '\u{0}']), - ('\u{1f2e}', ['\u{1f26}', '\u{0}', '\u{0}']), ('\u{1f2f}', ['\u{1f27}', '\u{0}', '\u{0}']), - ('\u{1f38}', ['\u{1f30}', '\u{0}', '\u{0}']), ('\u{1f39}', ['\u{1f31}', '\u{0}', '\u{0}']), - ('\u{1f3a}', ['\u{1f32}', '\u{0}', '\u{0}']), ('\u{1f3b}', ['\u{1f33}', '\u{0}', '\u{0}']), - ('\u{1f3c}', ['\u{1f34}', '\u{0}', '\u{0}']), ('\u{1f3d}', ['\u{1f35}', '\u{0}', '\u{0}']), - ('\u{1f3e}', ['\u{1f36}', '\u{0}', '\u{0}']), ('\u{1f3f}', ['\u{1f37}', '\u{0}', '\u{0}']), - ('\u{1f48}', ['\u{1f40}', '\u{0}', '\u{0}']), ('\u{1f49}', ['\u{1f41}', '\u{0}', '\u{0}']), - ('\u{1f4a}', ['\u{1f42}', '\u{0}', '\u{0}']), ('\u{1f4b}', ['\u{1f43}', '\u{0}', '\u{0}']), - ('\u{1f4c}', ['\u{1f44}', '\u{0}', '\u{0}']), ('\u{1f4d}', ['\u{1f45}', '\u{0}', '\u{0}']), - ('\u{1f59}', ['\u{1f51}', '\u{0}', '\u{0}']), ('\u{1f5b}', ['\u{1f53}', '\u{0}', '\u{0}']), - ('\u{1f5d}', ['\u{1f55}', '\u{0}', '\u{0}']), ('\u{1f5f}', ['\u{1f57}', '\u{0}', '\u{0}']), - ('\u{1f68}', ['\u{1f60}', '\u{0}', '\u{0}']), ('\u{1f69}', ['\u{1f61}', '\u{0}', '\u{0}']), - ('\u{1f6a}', ['\u{1f62}', '\u{0}', '\u{0}']), ('\u{1f6b}', ['\u{1f63}', '\u{0}', '\u{0}']), - ('\u{1f6c}', ['\u{1f64}', '\u{0}', '\u{0}']), ('\u{1f6d}', ['\u{1f65}', '\u{0}', '\u{0}']), - ('\u{1f6e}', ['\u{1f66}', '\u{0}', '\u{0}']), ('\u{1f6f}', ['\u{1f67}', '\u{0}', '\u{0}']), - ('\u{1f88}', ['\u{1f80}', '\u{0}', '\u{0}']), ('\u{1f89}', ['\u{1f81}', '\u{0}', '\u{0}']), - ('\u{1f8a}', ['\u{1f82}', '\u{0}', '\u{0}']), ('\u{1f8b}', ['\u{1f83}', '\u{0}', '\u{0}']), - ('\u{1f8c}', ['\u{1f84}', '\u{0}', '\u{0}']), ('\u{1f8d}', ['\u{1f85}', '\u{0}', '\u{0}']), - ('\u{1f8e}', ['\u{1f86}', '\u{0}', '\u{0}']), ('\u{1f8f}', ['\u{1f87}', '\u{0}', '\u{0}']), - ('\u{1f98}', ['\u{1f90}', '\u{0}', '\u{0}']), ('\u{1f99}', ['\u{1f91}', '\u{0}', '\u{0}']), - ('\u{1f9a}', ['\u{1f92}', '\u{0}', '\u{0}']), ('\u{1f9b}', ['\u{1f93}', '\u{0}', '\u{0}']), - ('\u{1f9c}', ['\u{1f94}', '\u{0}', '\u{0}']), ('\u{1f9d}', ['\u{1f95}', '\u{0}', '\u{0}']), - ('\u{1f9e}', ['\u{1f96}', '\u{0}', '\u{0}']), ('\u{1f9f}', ['\u{1f97}', '\u{0}', '\u{0}']), - ('\u{1fa8}', ['\u{1fa0}', '\u{0}', '\u{0}']), ('\u{1fa9}', ['\u{1fa1}', '\u{0}', '\u{0}']), - ('\u{1faa}', ['\u{1fa2}', '\u{0}', '\u{0}']), ('\u{1fab}', ['\u{1fa3}', '\u{0}', '\u{0}']), - ('\u{1fac}', ['\u{1fa4}', '\u{0}', '\u{0}']), ('\u{1fad}', ['\u{1fa5}', '\u{0}', '\u{0}']), - ('\u{1fae}', ['\u{1fa6}', '\u{0}', '\u{0}']), ('\u{1faf}', ['\u{1fa7}', '\u{0}', '\u{0}']), - ('\u{1fb8}', ['\u{1fb0}', '\u{0}', '\u{0}']), ('\u{1fb9}', ['\u{1fb1}', '\u{0}', '\u{0}']), - ('\u{1fba}', ['\u{1f70}', '\u{0}', '\u{0}']), ('\u{1fbb}', ['\u{1f71}', '\u{0}', '\u{0}']), - ('\u{1fbc}', ['\u{1fb3}', '\u{0}', '\u{0}']), ('\u{1fc8}', ['\u{1f72}', '\u{0}', '\u{0}']), - ('\u{1fc9}', ['\u{1f73}', '\u{0}', '\u{0}']), ('\u{1fca}', ['\u{1f74}', '\u{0}', '\u{0}']), - ('\u{1fcb}', ['\u{1f75}', '\u{0}', '\u{0}']), ('\u{1fcc}', ['\u{1fc3}', '\u{0}', '\u{0}']), - ('\u{1fd8}', ['\u{1fd0}', '\u{0}', '\u{0}']), ('\u{1fd9}', ['\u{1fd1}', '\u{0}', '\u{0}']), - ('\u{1fda}', ['\u{1f76}', '\u{0}', '\u{0}']), ('\u{1fdb}', ['\u{1f77}', '\u{0}', '\u{0}']), - ('\u{1fe8}', ['\u{1fe0}', '\u{0}', '\u{0}']), ('\u{1fe9}', ['\u{1fe1}', '\u{0}', '\u{0}']), - ('\u{1fea}', ['\u{1f7a}', '\u{0}', '\u{0}']), ('\u{1feb}', ['\u{1f7b}', '\u{0}', '\u{0}']), - ('\u{1fec}', ['\u{1fe5}', '\u{0}', '\u{0}']), ('\u{1ff8}', ['\u{1f78}', '\u{0}', '\u{0}']), - ('\u{1ff9}', ['\u{1f79}', '\u{0}', '\u{0}']), ('\u{1ffa}', ['\u{1f7c}', '\u{0}', '\u{0}']), - ('\u{1ffb}', ['\u{1f7d}', '\u{0}', '\u{0}']), ('\u{1ffc}', ['\u{1ff3}', '\u{0}', '\u{0}']), - ('\u{2126}', ['\u{3c9}', '\u{0}', '\u{0}']), ('\u{212a}', ['\u{6b}', '\u{0}', '\u{0}']), - ('\u{212b}', ['\u{e5}', '\u{0}', '\u{0}']), ('\u{2132}', ['\u{214e}', '\u{0}', '\u{0}']), - ('\u{2160}', ['\u{2170}', '\u{0}', '\u{0}']), ('\u{2161}', ['\u{2171}', '\u{0}', '\u{0}']), - ('\u{2162}', ['\u{2172}', '\u{0}', '\u{0}']), ('\u{2163}', ['\u{2173}', '\u{0}', '\u{0}']), - ('\u{2164}', ['\u{2174}', '\u{0}', '\u{0}']), ('\u{2165}', ['\u{2175}', '\u{0}', '\u{0}']), - ('\u{2166}', ['\u{2176}', '\u{0}', '\u{0}']), ('\u{2167}', ['\u{2177}', '\u{0}', '\u{0}']), - ('\u{2168}', ['\u{2178}', '\u{0}', '\u{0}']), ('\u{2169}', ['\u{2179}', '\u{0}', '\u{0}']), - ('\u{216a}', ['\u{217a}', '\u{0}', '\u{0}']), ('\u{216b}', ['\u{217b}', '\u{0}', '\u{0}']), - ('\u{216c}', ['\u{217c}', '\u{0}', '\u{0}']), ('\u{216d}', ['\u{217d}', '\u{0}', '\u{0}']), - ('\u{216e}', ['\u{217e}', '\u{0}', '\u{0}']), ('\u{216f}', ['\u{217f}', '\u{0}', '\u{0}']), - ('\u{2183}', ['\u{2184}', '\u{0}', '\u{0}']), ('\u{24b6}', ['\u{24d0}', '\u{0}', '\u{0}']), - ('\u{24b7}', ['\u{24d1}', '\u{0}', '\u{0}']), ('\u{24b8}', ['\u{24d2}', '\u{0}', '\u{0}']), - ('\u{24b9}', ['\u{24d3}', '\u{0}', '\u{0}']), ('\u{24ba}', ['\u{24d4}', '\u{0}', '\u{0}']), - ('\u{24bb}', ['\u{24d5}', '\u{0}', '\u{0}']), ('\u{24bc}', ['\u{24d6}', '\u{0}', '\u{0}']), - ('\u{24bd}', ['\u{24d7}', '\u{0}', '\u{0}']), ('\u{24be}', ['\u{24d8}', '\u{0}', '\u{0}']), - ('\u{24bf}', ['\u{24d9}', '\u{0}', '\u{0}']), ('\u{24c0}', ['\u{24da}', '\u{0}', '\u{0}']), - ('\u{24c1}', ['\u{24db}', '\u{0}', '\u{0}']), ('\u{24c2}', ['\u{24dc}', '\u{0}', '\u{0}']), - ('\u{24c3}', ['\u{24dd}', '\u{0}', '\u{0}']), ('\u{24c4}', ['\u{24de}', '\u{0}', '\u{0}']), - ('\u{24c5}', ['\u{24df}', '\u{0}', '\u{0}']), ('\u{24c6}', ['\u{24e0}', '\u{0}', '\u{0}']), - ('\u{24c7}', ['\u{24e1}', '\u{0}', '\u{0}']), ('\u{24c8}', ['\u{24e2}', '\u{0}', '\u{0}']), - ('\u{24c9}', ['\u{24e3}', '\u{0}', '\u{0}']), ('\u{24ca}', ['\u{24e4}', '\u{0}', '\u{0}']), - ('\u{24cb}', ['\u{24e5}', '\u{0}', '\u{0}']), ('\u{24cc}', ['\u{24e6}', '\u{0}', '\u{0}']), - ('\u{24cd}', ['\u{24e7}', '\u{0}', '\u{0}']), ('\u{24ce}', ['\u{24e8}', '\u{0}', '\u{0}']), - ('\u{24cf}', ['\u{24e9}', '\u{0}', '\u{0}']), ('\u{2c00}', ['\u{2c30}', '\u{0}', '\u{0}']), - ('\u{2c01}', ['\u{2c31}', '\u{0}', '\u{0}']), ('\u{2c02}', ['\u{2c32}', '\u{0}', '\u{0}']), - ('\u{2c03}', ['\u{2c33}', '\u{0}', '\u{0}']), ('\u{2c04}', ['\u{2c34}', '\u{0}', '\u{0}']), - ('\u{2c05}', ['\u{2c35}', '\u{0}', '\u{0}']), ('\u{2c06}', ['\u{2c36}', '\u{0}', '\u{0}']), - ('\u{2c07}', ['\u{2c37}', '\u{0}', '\u{0}']), ('\u{2c08}', ['\u{2c38}', '\u{0}', '\u{0}']), - ('\u{2c09}', ['\u{2c39}', '\u{0}', '\u{0}']), ('\u{2c0a}', ['\u{2c3a}', '\u{0}', '\u{0}']), - ('\u{2c0b}', ['\u{2c3b}', '\u{0}', '\u{0}']), ('\u{2c0c}', ['\u{2c3c}', '\u{0}', '\u{0}']), - ('\u{2c0d}', ['\u{2c3d}', '\u{0}', '\u{0}']), ('\u{2c0e}', ['\u{2c3e}', '\u{0}', '\u{0}']), - ('\u{2c0f}', ['\u{2c3f}', '\u{0}', '\u{0}']), ('\u{2c10}', ['\u{2c40}', '\u{0}', '\u{0}']), - ('\u{2c11}', ['\u{2c41}', '\u{0}', '\u{0}']), ('\u{2c12}', ['\u{2c42}', '\u{0}', '\u{0}']), - ('\u{2c13}', ['\u{2c43}', '\u{0}', '\u{0}']), ('\u{2c14}', ['\u{2c44}', '\u{0}', '\u{0}']), - ('\u{2c15}', ['\u{2c45}', '\u{0}', '\u{0}']), ('\u{2c16}', ['\u{2c46}', '\u{0}', '\u{0}']), - ('\u{2c17}', ['\u{2c47}', '\u{0}', '\u{0}']), ('\u{2c18}', ['\u{2c48}', '\u{0}', '\u{0}']), - ('\u{2c19}', ['\u{2c49}', '\u{0}', '\u{0}']), ('\u{2c1a}', ['\u{2c4a}', '\u{0}', '\u{0}']), - ('\u{2c1b}', ['\u{2c4b}', '\u{0}', '\u{0}']), ('\u{2c1c}', ['\u{2c4c}', '\u{0}', '\u{0}']), - ('\u{2c1d}', ['\u{2c4d}', '\u{0}', '\u{0}']), ('\u{2c1e}', ['\u{2c4e}', '\u{0}', '\u{0}']), - ('\u{2c1f}', ['\u{2c4f}', '\u{0}', '\u{0}']), ('\u{2c20}', ['\u{2c50}', '\u{0}', '\u{0}']), - ('\u{2c21}', ['\u{2c51}', '\u{0}', '\u{0}']), ('\u{2c22}', ['\u{2c52}', '\u{0}', '\u{0}']), - ('\u{2c23}', ['\u{2c53}', '\u{0}', '\u{0}']), ('\u{2c24}', ['\u{2c54}', '\u{0}', '\u{0}']), - ('\u{2c25}', ['\u{2c55}', '\u{0}', '\u{0}']), ('\u{2c26}', ['\u{2c56}', '\u{0}', '\u{0}']), - ('\u{2c27}', ['\u{2c57}', '\u{0}', '\u{0}']), ('\u{2c28}', ['\u{2c58}', '\u{0}', '\u{0}']), - ('\u{2c29}', ['\u{2c59}', '\u{0}', '\u{0}']), ('\u{2c2a}', ['\u{2c5a}', '\u{0}', '\u{0}']), - ('\u{2c2b}', ['\u{2c5b}', '\u{0}', '\u{0}']), ('\u{2c2c}', ['\u{2c5c}', '\u{0}', '\u{0}']), - ('\u{2c2d}', ['\u{2c5d}', '\u{0}', '\u{0}']), ('\u{2c2e}', ['\u{2c5e}', '\u{0}', '\u{0}']), - ('\u{2c2f}', ['\u{2c5f}', '\u{0}', '\u{0}']), ('\u{2c60}', ['\u{2c61}', '\u{0}', '\u{0}']), - ('\u{2c62}', ['\u{26b}', '\u{0}', '\u{0}']), ('\u{2c63}', ['\u{1d7d}', '\u{0}', '\u{0}']), - ('\u{2c64}', ['\u{27d}', '\u{0}', '\u{0}']), ('\u{2c67}', ['\u{2c68}', '\u{0}', '\u{0}']), - ('\u{2c69}', ['\u{2c6a}', '\u{0}', '\u{0}']), ('\u{2c6b}', ['\u{2c6c}', '\u{0}', '\u{0}']), - ('\u{2c6d}', ['\u{251}', '\u{0}', '\u{0}']), ('\u{2c6e}', ['\u{271}', '\u{0}', '\u{0}']), - ('\u{2c6f}', ['\u{250}', '\u{0}', '\u{0}']), ('\u{2c70}', ['\u{252}', '\u{0}', '\u{0}']), - ('\u{2c72}', ['\u{2c73}', '\u{0}', '\u{0}']), ('\u{2c75}', ['\u{2c76}', '\u{0}', '\u{0}']), - ('\u{2c7e}', ['\u{23f}', '\u{0}', '\u{0}']), ('\u{2c7f}', ['\u{240}', '\u{0}', '\u{0}']), - ('\u{2c80}', ['\u{2c81}', '\u{0}', '\u{0}']), ('\u{2c82}', ['\u{2c83}', '\u{0}', '\u{0}']), - ('\u{2c84}', ['\u{2c85}', '\u{0}', '\u{0}']), ('\u{2c86}', ['\u{2c87}', '\u{0}', '\u{0}']), - ('\u{2c88}', ['\u{2c89}', '\u{0}', '\u{0}']), ('\u{2c8a}', ['\u{2c8b}', '\u{0}', '\u{0}']), - ('\u{2c8c}', ['\u{2c8d}', '\u{0}', '\u{0}']), ('\u{2c8e}', ['\u{2c8f}', '\u{0}', '\u{0}']), - ('\u{2c90}', ['\u{2c91}', '\u{0}', '\u{0}']), ('\u{2c92}', ['\u{2c93}', '\u{0}', '\u{0}']), - ('\u{2c94}', ['\u{2c95}', '\u{0}', '\u{0}']), ('\u{2c96}', ['\u{2c97}', '\u{0}', '\u{0}']), - ('\u{2c98}', ['\u{2c99}', '\u{0}', '\u{0}']), ('\u{2c9a}', ['\u{2c9b}', '\u{0}', '\u{0}']), - ('\u{2c9c}', ['\u{2c9d}', '\u{0}', '\u{0}']), ('\u{2c9e}', ['\u{2c9f}', '\u{0}', '\u{0}']), - ('\u{2ca0}', ['\u{2ca1}', '\u{0}', '\u{0}']), ('\u{2ca2}', ['\u{2ca3}', '\u{0}', '\u{0}']), - ('\u{2ca4}', ['\u{2ca5}', '\u{0}', '\u{0}']), ('\u{2ca6}', ['\u{2ca7}', '\u{0}', '\u{0}']), - ('\u{2ca8}', ['\u{2ca9}', '\u{0}', '\u{0}']), ('\u{2caa}', ['\u{2cab}', '\u{0}', '\u{0}']), - ('\u{2cac}', ['\u{2cad}', '\u{0}', '\u{0}']), ('\u{2cae}', ['\u{2caf}', '\u{0}', '\u{0}']), - ('\u{2cb0}', ['\u{2cb1}', '\u{0}', '\u{0}']), ('\u{2cb2}', ['\u{2cb3}', '\u{0}', '\u{0}']), - ('\u{2cb4}', ['\u{2cb5}', '\u{0}', '\u{0}']), ('\u{2cb6}', ['\u{2cb7}', '\u{0}', '\u{0}']), - ('\u{2cb8}', ['\u{2cb9}', '\u{0}', '\u{0}']), ('\u{2cba}', ['\u{2cbb}', '\u{0}', '\u{0}']), - ('\u{2cbc}', ['\u{2cbd}', '\u{0}', '\u{0}']), ('\u{2cbe}', ['\u{2cbf}', '\u{0}', '\u{0}']), - ('\u{2cc0}', ['\u{2cc1}', '\u{0}', '\u{0}']), ('\u{2cc2}', ['\u{2cc3}', '\u{0}', '\u{0}']), - ('\u{2cc4}', ['\u{2cc5}', '\u{0}', '\u{0}']), ('\u{2cc6}', ['\u{2cc7}', '\u{0}', '\u{0}']), - ('\u{2cc8}', ['\u{2cc9}', '\u{0}', '\u{0}']), ('\u{2cca}', ['\u{2ccb}', '\u{0}', '\u{0}']), - ('\u{2ccc}', ['\u{2ccd}', '\u{0}', '\u{0}']), ('\u{2cce}', ['\u{2ccf}', '\u{0}', '\u{0}']), - ('\u{2cd0}', ['\u{2cd1}', '\u{0}', '\u{0}']), ('\u{2cd2}', ['\u{2cd3}', '\u{0}', '\u{0}']), - ('\u{2cd4}', ['\u{2cd5}', '\u{0}', '\u{0}']), ('\u{2cd6}', ['\u{2cd7}', '\u{0}', '\u{0}']), - ('\u{2cd8}', ['\u{2cd9}', '\u{0}', '\u{0}']), ('\u{2cda}', ['\u{2cdb}', '\u{0}', '\u{0}']), - ('\u{2cdc}', ['\u{2cdd}', '\u{0}', '\u{0}']), ('\u{2cde}', ['\u{2cdf}', '\u{0}', '\u{0}']), - ('\u{2ce0}', ['\u{2ce1}', '\u{0}', '\u{0}']), ('\u{2ce2}', ['\u{2ce3}', '\u{0}', '\u{0}']), - ('\u{2ceb}', ['\u{2cec}', '\u{0}', '\u{0}']), ('\u{2ced}', ['\u{2cee}', '\u{0}', '\u{0}']), - ('\u{2cf2}', ['\u{2cf3}', '\u{0}', '\u{0}']), ('\u{a640}', ['\u{a641}', '\u{0}', '\u{0}']), - ('\u{a642}', ['\u{a643}', '\u{0}', '\u{0}']), ('\u{a644}', ['\u{a645}', '\u{0}', '\u{0}']), - ('\u{a646}', ['\u{a647}', '\u{0}', '\u{0}']), ('\u{a648}', ['\u{a649}', '\u{0}', '\u{0}']), - ('\u{a64a}', ['\u{a64b}', '\u{0}', '\u{0}']), ('\u{a64c}', ['\u{a64d}', '\u{0}', '\u{0}']), - ('\u{a64e}', ['\u{a64f}', '\u{0}', '\u{0}']), ('\u{a650}', ['\u{a651}', '\u{0}', '\u{0}']), - ('\u{a652}', ['\u{a653}', '\u{0}', '\u{0}']), ('\u{a654}', ['\u{a655}', '\u{0}', '\u{0}']), - ('\u{a656}', ['\u{a657}', '\u{0}', '\u{0}']), ('\u{a658}', ['\u{a659}', '\u{0}', '\u{0}']), - ('\u{a65a}', ['\u{a65b}', '\u{0}', '\u{0}']), ('\u{a65c}', ['\u{a65d}', '\u{0}', '\u{0}']), - ('\u{a65e}', ['\u{a65f}', '\u{0}', '\u{0}']), ('\u{a660}', ['\u{a661}', '\u{0}', '\u{0}']), - ('\u{a662}', ['\u{a663}', '\u{0}', '\u{0}']), ('\u{a664}', ['\u{a665}', '\u{0}', '\u{0}']), - ('\u{a666}', ['\u{a667}', '\u{0}', '\u{0}']), ('\u{a668}', ['\u{a669}', '\u{0}', '\u{0}']), - ('\u{a66a}', ['\u{a66b}', '\u{0}', '\u{0}']), ('\u{a66c}', ['\u{a66d}', '\u{0}', '\u{0}']), - ('\u{a680}', ['\u{a681}', '\u{0}', '\u{0}']), ('\u{a682}', ['\u{a683}', '\u{0}', '\u{0}']), - ('\u{a684}', ['\u{a685}', '\u{0}', '\u{0}']), ('\u{a686}', ['\u{a687}', '\u{0}', '\u{0}']), - ('\u{a688}', ['\u{a689}', '\u{0}', '\u{0}']), ('\u{a68a}', ['\u{a68b}', '\u{0}', '\u{0}']), - ('\u{a68c}', ['\u{a68d}', '\u{0}', '\u{0}']), ('\u{a68e}', ['\u{a68f}', '\u{0}', '\u{0}']), - ('\u{a690}', ['\u{a691}', '\u{0}', '\u{0}']), ('\u{a692}', ['\u{a693}', '\u{0}', '\u{0}']), - ('\u{a694}', ['\u{a695}', '\u{0}', '\u{0}']), ('\u{a696}', ['\u{a697}', '\u{0}', '\u{0}']), - ('\u{a698}', ['\u{a699}', '\u{0}', '\u{0}']), ('\u{a69a}', ['\u{a69b}', '\u{0}', '\u{0}']), - ('\u{a722}', ['\u{a723}', '\u{0}', '\u{0}']), ('\u{a724}', ['\u{a725}', '\u{0}', '\u{0}']), - ('\u{a726}', ['\u{a727}', '\u{0}', '\u{0}']), ('\u{a728}', ['\u{a729}', '\u{0}', '\u{0}']), - ('\u{a72a}', ['\u{a72b}', '\u{0}', '\u{0}']), ('\u{a72c}', ['\u{a72d}', '\u{0}', '\u{0}']), - ('\u{a72e}', ['\u{a72f}', '\u{0}', '\u{0}']), ('\u{a732}', ['\u{a733}', '\u{0}', '\u{0}']), - ('\u{a734}', ['\u{a735}', '\u{0}', '\u{0}']), ('\u{a736}', ['\u{a737}', '\u{0}', '\u{0}']), - ('\u{a738}', ['\u{a739}', '\u{0}', '\u{0}']), ('\u{a73a}', ['\u{a73b}', '\u{0}', '\u{0}']), - ('\u{a73c}', ['\u{a73d}', '\u{0}', '\u{0}']), ('\u{a73e}', ['\u{a73f}', '\u{0}', '\u{0}']), - ('\u{a740}', ['\u{a741}', '\u{0}', '\u{0}']), ('\u{a742}', ['\u{a743}', '\u{0}', '\u{0}']), - ('\u{a744}', ['\u{a745}', '\u{0}', '\u{0}']), ('\u{a746}', ['\u{a747}', '\u{0}', '\u{0}']), - ('\u{a748}', ['\u{a749}', '\u{0}', '\u{0}']), ('\u{a74a}', ['\u{a74b}', '\u{0}', '\u{0}']), - ('\u{a74c}', ['\u{a74d}', '\u{0}', '\u{0}']), ('\u{a74e}', ['\u{a74f}', '\u{0}', '\u{0}']), - ('\u{a750}', ['\u{a751}', '\u{0}', '\u{0}']), ('\u{a752}', ['\u{a753}', '\u{0}', '\u{0}']), - ('\u{a754}', ['\u{a755}', '\u{0}', '\u{0}']), ('\u{a756}', ['\u{a757}', '\u{0}', '\u{0}']), - ('\u{a758}', ['\u{a759}', '\u{0}', '\u{0}']), ('\u{a75a}', ['\u{a75b}', '\u{0}', '\u{0}']), - ('\u{a75c}', ['\u{a75d}', '\u{0}', '\u{0}']), ('\u{a75e}', ['\u{a75f}', '\u{0}', '\u{0}']), - ('\u{a760}', ['\u{a761}', '\u{0}', '\u{0}']), ('\u{a762}', ['\u{a763}', '\u{0}', '\u{0}']), - ('\u{a764}', ['\u{a765}', '\u{0}', '\u{0}']), ('\u{a766}', ['\u{a767}', '\u{0}', '\u{0}']), - ('\u{a768}', ['\u{a769}', '\u{0}', '\u{0}']), ('\u{a76a}', ['\u{a76b}', '\u{0}', '\u{0}']), - ('\u{a76c}', ['\u{a76d}', '\u{0}', '\u{0}']), ('\u{a76e}', ['\u{a76f}', '\u{0}', '\u{0}']), - ('\u{a779}', ['\u{a77a}', '\u{0}', '\u{0}']), ('\u{a77b}', ['\u{a77c}', '\u{0}', '\u{0}']), - ('\u{a77d}', ['\u{1d79}', '\u{0}', '\u{0}']), ('\u{a77e}', ['\u{a77f}', '\u{0}', '\u{0}']), - ('\u{a780}', ['\u{a781}', '\u{0}', '\u{0}']), ('\u{a782}', ['\u{a783}', '\u{0}', '\u{0}']), - ('\u{a784}', ['\u{a785}', '\u{0}', '\u{0}']), ('\u{a786}', ['\u{a787}', '\u{0}', '\u{0}']), - ('\u{a78b}', ['\u{a78c}', '\u{0}', '\u{0}']), ('\u{a78d}', ['\u{265}', '\u{0}', '\u{0}']), - ('\u{a790}', ['\u{a791}', '\u{0}', '\u{0}']), ('\u{a792}', ['\u{a793}', '\u{0}', '\u{0}']), - ('\u{a796}', ['\u{a797}', '\u{0}', '\u{0}']), ('\u{a798}', ['\u{a799}', '\u{0}', '\u{0}']), - ('\u{a79a}', ['\u{a79b}', '\u{0}', '\u{0}']), ('\u{a79c}', ['\u{a79d}', '\u{0}', '\u{0}']), - ('\u{a79e}', ['\u{a79f}', '\u{0}', '\u{0}']), ('\u{a7a0}', ['\u{a7a1}', '\u{0}', '\u{0}']), - ('\u{a7a2}', ['\u{a7a3}', '\u{0}', '\u{0}']), ('\u{a7a4}', ['\u{a7a5}', '\u{0}', '\u{0}']), - ('\u{a7a6}', ['\u{a7a7}', '\u{0}', '\u{0}']), ('\u{a7a8}', ['\u{a7a9}', '\u{0}', '\u{0}']), - ('\u{a7aa}', ['\u{266}', '\u{0}', '\u{0}']), ('\u{a7ab}', ['\u{25c}', '\u{0}', '\u{0}']), - ('\u{a7ac}', ['\u{261}', '\u{0}', '\u{0}']), ('\u{a7ad}', ['\u{26c}', '\u{0}', '\u{0}']), - ('\u{a7ae}', ['\u{26a}', '\u{0}', '\u{0}']), ('\u{a7b0}', ['\u{29e}', '\u{0}', '\u{0}']), - ('\u{a7b1}', ['\u{287}', '\u{0}', '\u{0}']), ('\u{a7b2}', ['\u{29d}', '\u{0}', '\u{0}']), - ('\u{a7b3}', ['\u{ab53}', '\u{0}', '\u{0}']), ('\u{a7b4}', ['\u{a7b5}', '\u{0}', '\u{0}']), - ('\u{a7b6}', ['\u{a7b7}', '\u{0}', '\u{0}']), ('\u{a7b8}', ['\u{a7b9}', '\u{0}', '\u{0}']), - ('\u{a7ba}', ['\u{a7bb}', '\u{0}', '\u{0}']), ('\u{a7bc}', ['\u{a7bd}', '\u{0}', '\u{0}']), - ('\u{a7be}', ['\u{a7bf}', '\u{0}', '\u{0}']), ('\u{a7c0}', ['\u{a7c1}', '\u{0}', '\u{0}']), - ('\u{a7c2}', ['\u{a7c3}', '\u{0}', '\u{0}']), ('\u{a7c4}', ['\u{a794}', '\u{0}', '\u{0}']), - ('\u{a7c5}', ['\u{282}', '\u{0}', '\u{0}']), ('\u{a7c6}', ['\u{1d8e}', '\u{0}', '\u{0}']), - ('\u{a7c7}', ['\u{a7c8}', '\u{0}', '\u{0}']), ('\u{a7c9}', ['\u{a7ca}', '\u{0}', '\u{0}']), - ('\u{a7cb}', ['\u{264}', '\u{0}', '\u{0}']), ('\u{a7cc}', ['\u{a7cd}', '\u{0}', '\u{0}']), - ('\u{a7ce}', ['\u{a7cf}', '\u{0}', '\u{0}']), ('\u{a7d0}', ['\u{a7d1}', '\u{0}', '\u{0}']), - ('\u{a7d2}', ['\u{a7d3}', '\u{0}', '\u{0}']), ('\u{a7d4}', ['\u{a7d5}', '\u{0}', '\u{0}']), - ('\u{a7d6}', ['\u{a7d7}', '\u{0}', '\u{0}']), ('\u{a7d8}', ['\u{a7d9}', '\u{0}', '\u{0}']), - ('\u{a7da}', ['\u{a7db}', '\u{0}', '\u{0}']), ('\u{a7dc}', ['\u{19b}', '\u{0}', '\u{0}']), - ('\u{a7f5}', ['\u{a7f6}', '\u{0}', '\u{0}']), ('\u{ff21}', ['\u{ff41}', '\u{0}', '\u{0}']), - ('\u{ff22}', ['\u{ff42}', '\u{0}', '\u{0}']), ('\u{ff23}', ['\u{ff43}', '\u{0}', '\u{0}']), - ('\u{ff24}', ['\u{ff44}', '\u{0}', '\u{0}']), ('\u{ff25}', ['\u{ff45}', '\u{0}', '\u{0}']), - ('\u{ff26}', ['\u{ff46}', '\u{0}', '\u{0}']), ('\u{ff27}', ['\u{ff47}', '\u{0}', '\u{0}']), - ('\u{ff28}', ['\u{ff48}', '\u{0}', '\u{0}']), ('\u{ff29}', ['\u{ff49}', '\u{0}', '\u{0}']), - ('\u{ff2a}', ['\u{ff4a}', '\u{0}', '\u{0}']), ('\u{ff2b}', ['\u{ff4b}', '\u{0}', '\u{0}']), - ('\u{ff2c}', ['\u{ff4c}', '\u{0}', '\u{0}']), ('\u{ff2d}', ['\u{ff4d}', '\u{0}', '\u{0}']), - ('\u{ff2e}', ['\u{ff4e}', '\u{0}', '\u{0}']), ('\u{ff2f}', ['\u{ff4f}', '\u{0}', '\u{0}']), - ('\u{ff30}', ['\u{ff50}', '\u{0}', '\u{0}']), ('\u{ff31}', ['\u{ff51}', '\u{0}', '\u{0}']), - ('\u{ff32}', ['\u{ff52}', '\u{0}', '\u{0}']), ('\u{ff33}', ['\u{ff53}', '\u{0}', '\u{0}']), - ('\u{ff34}', ['\u{ff54}', '\u{0}', '\u{0}']), ('\u{ff35}', ['\u{ff55}', '\u{0}', '\u{0}']), - ('\u{ff36}', ['\u{ff56}', '\u{0}', '\u{0}']), ('\u{ff37}', ['\u{ff57}', '\u{0}', '\u{0}']), - ('\u{ff38}', ['\u{ff58}', '\u{0}', '\u{0}']), ('\u{ff39}', ['\u{ff59}', '\u{0}', '\u{0}']), - ('\u{ff3a}', ['\u{ff5a}', '\u{0}', '\u{0}']), - ('\u{10400}', ['\u{10428}', '\u{0}', '\u{0}']), - ('\u{10401}', ['\u{10429}', '\u{0}', '\u{0}']), - ('\u{10402}', ['\u{1042a}', '\u{0}', '\u{0}']), - ('\u{10403}', ['\u{1042b}', '\u{0}', '\u{0}']), - ('\u{10404}', ['\u{1042c}', '\u{0}', '\u{0}']), - ('\u{10405}', ['\u{1042d}', '\u{0}', '\u{0}']), - ('\u{10406}', ['\u{1042e}', '\u{0}', '\u{0}']), - ('\u{10407}', ['\u{1042f}', '\u{0}', '\u{0}']), - ('\u{10408}', ['\u{10430}', '\u{0}', '\u{0}']), - ('\u{10409}', ['\u{10431}', '\u{0}', '\u{0}']), - ('\u{1040a}', ['\u{10432}', '\u{0}', '\u{0}']), - ('\u{1040b}', ['\u{10433}', '\u{0}', '\u{0}']), - ('\u{1040c}', ['\u{10434}', '\u{0}', '\u{0}']), - ('\u{1040d}', ['\u{10435}', '\u{0}', '\u{0}']), - ('\u{1040e}', ['\u{10436}', '\u{0}', '\u{0}']), - ('\u{1040f}', ['\u{10437}', '\u{0}', '\u{0}']), - ('\u{10410}', ['\u{10438}', '\u{0}', '\u{0}']), - ('\u{10411}', ['\u{10439}', '\u{0}', '\u{0}']), - ('\u{10412}', ['\u{1043a}', '\u{0}', '\u{0}']), - ('\u{10413}', ['\u{1043b}', '\u{0}', '\u{0}']), - ('\u{10414}', ['\u{1043c}', '\u{0}', '\u{0}']), - ('\u{10415}', ['\u{1043d}', '\u{0}', '\u{0}']), - ('\u{10416}', ['\u{1043e}', '\u{0}', '\u{0}']), - ('\u{10417}', ['\u{1043f}', '\u{0}', '\u{0}']), - ('\u{10418}', ['\u{10440}', '\u{0}', '\u{0}']), - ('\u{10419}', ['\u{10441}', '\u{0}', '\u{0}']), - ('\u{1041a}', ['\u{10442}', '\u{0}', '\u{0}']), - ('\u{1041b}', ['\u{10443}', '\u{0}', '\u{0}']), - ('\u{1041c}', ['\u{10444}', '\u{0}', '\u{0}']), - ('\u{1041d}', ['\u{10445}', '\u{0}', '\u{0}']), - ('\u{1041e}', ['\u{10446}', '\u{0}', '\u{0}']), - ('\u{1041f}', ['\u{10447}', '\u{0}', '\u{0}']), - ('\u{10420}', ['\u{10448}', '\u{0}', '\u{0}']), - ('\u{10421}', ['\u{10449}', '\u{0}', '\u{0}']), - ('\u{10422}', ['\u{1044a}', '\u{0}', '\u{0}']), - ('\u{10423}', ['\u{1044b}', '\u{0}', '\u{0}']), - ('\u{10424}', ['\u{1044c}', '\u{0}', '\u{0}']), - ('\u{10425}', ['\u{1044d}', '\u{0}', '\u{0}']), - ('\u{10426}', ['\u{1044e}', '\u{0}', '\u{0}']), - ('\u{10427}', ['\u{1044f}', '\u{0}', '\u{0}']), - ('\u{104b0}', ['\u{104d8}', '\u{0}', '\u{0}']), - ('\u{104b1}', ['\u{104d9}', '\u{0}', '\u{0}']), - ('\u{104b2}', ['\u{104da}', '\u{0}', '\u{0}']), - ('\u{104b3}', ['\u{104db}', '\u{0}', '\u{0}']), - ('\u{104b4}', ['\u{104dc}', '\u{0}', '\u{0}']), - ('\u{104b5}', ['\u{104dd}', '\u{0}', '\u{0}']), - ('\u{104b6}', ['\u{104de}', '\u{0}', '\u{0}']), - ('\u{104b7}', ['\u{104df}', '\u{0}', '\u{0}']), - ('\u{104b8}', ['\u{104e0}', '\u{0}', '\u{0}']), - ('\u{104b9}', ['\u{104e1}', '\u{0}', '\u{0}']), - ('\u{104ba}', ['\u{104e2}', '\u{0}', '\u{0}']), - ('\u{104bb}', ['\u{104e3}', '\u{0}', '\u{0}']), - ('\u{104bc}', ['\u{104e4}', '\u{0}', '\u{0}']), - ('\u{104bd}', ['\u{104e5}', '\u{0}', '\u{0}']), - ('\u{104be}', ['\u{104e6}', '\u{0}', '\u{0}']), - ('\u{104bf}', ['\u{104e7}', '\u{0}', '\u{0}']), - ('\u{104c0}', ['\u{104e8}', '\u{0}', '\u{0}']), - ('\u{104c1}', ['\u{104e9}', '\u{0}', '\u{0}']), - ('\u{104c2}', ['\u{104ea}', '\u{0}', '\u{0}']), - ('\u{104c3}', ['\u{104eb}', '\u{0}', '\u{0}']), - ('\u{104c4}', ['\u{104ec}', '\u{0}', '\u{0}']), - ('\u{104c5}', ['\u{104ed}', '\u{0}', '\u{0}']), - ('\u{104c6}', ['\u{104ee}', '\u{0}', '\u{0}']), - ('\u{104c7}', ['\u{104ef}', '\u{0}', '\u{0}']), - ('\u{104c8}', ['\u{104f0}', '\u{0}', '\u{0}']), - ('\u{104c9}', ['\u{104f1}', '\u{0}', '\u{0}']), - ('\u{104ca}', ['\u{104f2}', '\u{0}', '\u{0}']), - ('\u{104cb}', ['\u{104f3}', '\u{0}', '\u{0}']), - ('\u{104cc}', ['\u{104f4}', '\u{0}', '\u{0}']), - ('\u{104cd}', ['\u{104f5}', '\u{0}', '\u{0}']), - ('\u{104ce}', ['\u{104f6}', '\u{0}', '\u{0}']), - ('\u{104cf}', ['\u{104f7}', '\u{0}', '\u{0}']), - ('\u{104d0}', ['\u{104f8}', '\u{0}', '\u{0}']), - ('\u{104d1}', ['\u{104f9}', '\u{0}', '\u{0}']), - ('\u{104d2}', ['\u{104fa}', '\u{0}', '\u{0}']), - ('\u{104d3}', ['\u{104fb}', '\u{0}', '\u{0}']), - ('\u{10570}', ['\u{10597}', '\u{0}', '\u{0}']), - ('\u{10571}', ['\u{10598}', '\u{0}', '\u{0}']), - ('\u{10572}', ['\u{10599}', '\u{0}', '\u{0}']), - ('\u{10573}', ['\u{1059a}', '\u{0}', '\u{0}']), - ('\u{10574}', ['\u{1059b}', '\u{0}', '\u{0}']), - ('\u{10575}', ['\u{1059c}', '\u{0}', '\u{0}']), - ('\u{10576}', ['\u{1059d}', '\u{0}', '\u{0}']), - ('\u{10577}', ['\u{1059e}', '\u{0}', '\u{0}']), - ('\u{10578}', ['\u{1059f}', '\u{0}', '\u{0}']), - ('\u{10579}', ['\u{105a0}', '\u{0}', '\u{0}']), - ('\u{1057a}', ['\u{105a1}', '\u{0}', '\u{0}']), - ('\u{1057c}', ['\u{105a3}', '\u{0}', '\u{0}']), - ('\u{1057d}', ['\u{105a4}', '\u{0}', '\u{0}']), - ('\u{1057e}', ['\u{105a5}', '\u{0}', '\u{0}']), - ('\u{1057f}', ['\u{105a6}', '\u{0}', '\u{0}']), - ('\u{10580}', ['\u{105a7}', '\u{0}', '\u{0}']), - ('\u{10581}', ['\u{105a8}', '\u{0}', '\u{0}']), - ('\u{10582}', ['\u{105a9}', '\u{0}', '\u{0}']), - ('\u{10583}', ['\u{105aa}', '\u{0}', '\u{0}']), - ('\u{10584}', ['\u{105ab}', '\u{0}', '\u{0}']), - ('\u{10585}', ['\u{105ac}', '\u{0}', '\u{0}']), - ('\u{10586}', ['\u{105ad}', '\u{0}', '\u{0}']), - ('\u{10587}', ['\u{105ae}', '\u{0}', '\u{0}']), - ('\u{10588}', ['\u{105af}', '\u{0}', '\u{0}']), - ('\u{10589}', ['\u{105b0}', '\u{0}', '\u{0}']), - ('\u{1058a}', ['\u{105b1}', '\u{0}', '\u{0}']), - ('\u{1058c}', ['\u{105b3}', '\u{0}', '\u{0}']), - ('\u{1058d}', ['\u{105b4}', '\u{0}', '\u{0}']), - ('\u{1058e}', ['\u{105b5}', '\u{0}', '\u{0}']), - ('\u{1058f}', ['\u{105b6}', '\u{0}', '\u{0}']), - ('\u{10590}', ['\u{105b7}', '\u{0}', '\u{0}']), - ('\u{10591}', ['\u{105b8}', '\u{0}', '\u{0}']), - ('\u{10592}', ['\u{105b9}', '\u{0}', '\u{0}']), - ('\u{10594}', ['\u{105bb}', '\u{0}', '\u{0}']), - ('\u{10595}', ['\u{105bc}', '\u{0}', '\u{0}']), - ('\u{10c80}', ['\u{10cc0}', '\u{0}', '\u{0}']), - ('\u{10c81}', ['\u{10cc1}', '\u{0}', '\u{0}']), - ('\u{10c82}', ['\u{10cc2}', '\u{0}', '\u{0}']), - ('\u{10c83}', ['\u{10cc3}', '\u{0}', '\u{0}']), - ('\u{10c84}', ['\u{10cc4}', '\u{0}', '\u{0}']), - ('\u{10c85}', ['\u{10cc5}', '\u{0}', '\u{0}']), - ('\u{10c86}', ['\u{10cc6}', '\u{0}', '\u{0}']), - ('\u{10c87}', ['\u{10cc7}', '\u{0}', '\u{0}']), - ('\u{10c88}', ['\u{10cc8}', '\u{0}', '\u{0}']), - ('\u{10c89}', ['\u{10cc9}', '\u{0}', '\u{0}']), - ('\u{10c8a}', ['\u{10cca}', '\u{0}', '\u{0}']), - ('\u{10c8b}', ['\u{10ccb}', '\u{0}', '\u{0}']), - ('\u{10c8c}', ['\u{10ccc}', '\u{0}', '\u{0}']), - ('\u{10c8d}', ['\u{10ccd}', '\u{0}', '\u{0}']), - ('\u{10c8e}', ['\u{10cce}', '\u{0}', '\u{0}']), - ('\u{10c8f}', ['\u{10ccf}', '\u{0}', '\u{0}']), - ('\u{10c90}', ['\u{10cd0}', '\u{0}', '\u{0}']), - ('\u{10c91}', ['\u{10cd1}', '\u{0}', '\u{0}']), - ('\u{10c92}', ['\u{10cd2}', '\u{0}', '\u{0}']), - ('\u{10c93}', ['\u{10cd3}', '\u{0}', '\u{0}']), - ('\u{10c94}', ['\u{10cd4}', '\u{0}', '\u{0}']), - ('\u{10c95}', ['\u{10cd5}', '\u{0}', '\u{0}']), - ('\u{10c96}', ['\u{10cd6}', '\u{0}', '\u{0}']), - ('\u{10c97}', ['\u{10cd7}', '\u{0}', '\u{0}']), - ('\u{10c98}', ['\u{10cd8}', '\u{0}', '\u{0}']), - ('\u{10c99}', ['\u{10cd9}', '\u{0}', '\u{0}']), - ('\u{10c9a}', ['\u{10cda}', '\u{0}', '\u{0}']), - ('\u{10c9b}', ['\u{10cdb}', '\u{0}', '\u{0}']), - ('\u{10c9c}', ['\u{10cdc}', '\u{0}', '\u{0}']), - ('\u{10c9d}', ['\u{10cdd}', '\u{0}', '\u{0}']), - ('\u{10c9e}', ['\u{10cde}', '\u{0}', '\u{0}']), - ('\u{10c9f}', ['\u{10cdf}', '\u{0}', '\u{0}']), - ('\u{10ca0}', ['\u{10ce0}', '\u{0}', '\u{0}']), - ('\u{10ca1}', ['\u{10ce1}', '\u{0}', '\u{0}']), - ('\u{10ca2}', ['\u{10ce2}', '\u{0}', '\u{0}']), - ('\u{10ca3}', ['\u{10ce3}', '\u{0}', '\u{0}']), - ('\u{10ca4}', ['\u{10ce4}', '\u{0}', '\u{0}']), - ('\u{10ca5}', ['\u{10ce5}', '\u{0}', '\u{0}']), - ('\u{10ca6}', ['\u{10ce6}', '\u{0}', '\u{0}']), - ('\u{10ca7}', ['\u{10ce7}', '\u{0}', '\u{0}']), - ('\u{10ca8}', ['\u{10ce8}', '\u{0}', '\u{0}']), - ('\u{10ca9}', ['\u{10ce9}', '\u{0}', '\u{0}']), - ('\u{10caa}', ['\u{10cea}', '\u{0}', '\u{0}']), - ('\u{10cab}', ['\u{10ceb}', '\u{0}', '\u{0}']), - ('\u{10cac}', ['\u{10cec}', '\u{0}', '\u{0}']), - ('\u{10cad}', ['\u{10ced}', '\u{0}', '\u{0}']), - ('\u{10cae}', ['\u{10cee}', '\u{0}', '\u{0}']), - ('\u{10caf}', ['\u{10cef}', '\u{0}', '\u{0}']), - ('\u{10cb0}', ['\u{10cf0}', '\u{0}', '\u{0}']), - ('\u{10cb1}', ['\u{10cf1}', '\u{0}', '\u{0}']), - ('\u{10cb2}', ['\u{10cf2}', '\u{0}', '\u{0}']), - ('\u{10d50}', ['\u{10d70}', '\u{0}', '\u{0}']), - ('\u{10d51}', ['\u{10d71}', '\u{0}', '\u{0}']), - ('\u{10d52}', ['\u{10d72}', '\u{0}', '\u{0}']), - ('\u{10d53}', ['\u{10d73}', '\u{0}', '\u{0}']), - ('\u{10d54}', ['\u{10d74}', '\u{0}', '\u{0}']), - ('\u{10d55}', ['\u{10d75}', '\u{0}', '\u{0}']), - ('\u{10d56}', ['\u{10d76}', '\u{0}', '\u{0}']), - ('\u{10d57}', ['\u{10d77}', '\u{0}', '\u{0}']), - ('\u{10d58}', ['\u{10d78}', '\u{0}', '\u{0}']), - ('\u{10d59}', ['\u{10d79}', '\u{0}', '\u{0}']), - ('\u{10d5a}', ['\u{10d7a}', '\u{0}', '\u{0}']), - ('\u{10d5b}', ['\u{10d7b}', '\u{0}', '\u{0}']), - ('\u{10d5c}', ['\u{10d7c}', '\u{0}', '\u{0}']), - ('\u{10d5d}', ['\u{10d7d}', '\u{0}', '\u{0}']), - ('\u{10d5e}', ['\u{10d7e}', '\u{0}', '\u{0}']), - ('\u{10d5f}', ['\u{10d7f}', '\u{0}', '\u{0}']), - ('\u{10d60}', ['\u{10d80}', '\u{0}', '\u{0}']), - ('\u{10d61}', ['\u{10d81}', '\u{0}', '\u{0}']), - ('\u{10d62}', ['\u{10d82}', '\u{0}', '\u{0}']), - ('\u{10d63}', ['\u{10d83}', '\u{0}', '\u{0}']), - ('\u{10d64}', ['\u{10d84}', '\u{0}', '\u{0}']), - ('\u{10d65}', ['\u{10d85}', '\u{0}', '\u{0}']), - ('\u{118a0}', ['\u{118c0}', '\u{0}', '\u{0}']), - ('\u{118a1}', ['\u{118c1}', '\u{0}', '\u{0}']), - ('\u{118a2}', ['\u{118c2}', '\u{0}', '\u{0}']), - ('\u{118a3}', ['\u{118c3}', '\u{0}', '\u{0}']), - ('\u{118a4}', ['\u{118c4}', '\u{0}', '\u{0}']), - ('\u{118a5}', ['\u{118c5}', '\u{0}', '\u{0}']), - ('\u{118a6}', ['\u{118c6}', '\u{0}', '\u{0}']), - ('\u{118a7}', ['\u{118c7}', '\u{0}', '\u{0}']), - ('\u{118a8}', ['\u{118c8}', '\u{0}', '\u{0}']), - ('\u{118a9}', ['\u{118c9}', '\u{0}', '\u{0}']), - ('\u{118aa}', ['\u{118ca}', '\u{0}', '\u{0}']), - ('\u{118ab}', ['\u{118cb}', '\u{0}', '\u{0}']), - ('\u{118ac}', ['\u{118cc}', '\u{0}', '\u{0}']), - ('\u{118ad}', ['\u{118cd}', '\u{0}', '\u{0}']), - ('\u{118ae}', ['\u{118ce}', '\u{0}', '\u{0}']), - ('\u{118af}', ['\u{118cf}', '\u{0}', '\u{0}']), - ('\u{118b0}', ['\u{118d0}', '\u{0}', '\u{0}']), - ('\u{118b1}', ['\u{118d1}', '\u{0}', '\u{0}']), - ('\u{118b2}', ['\u{118d2}', '\u{0}', '\u{0}']), - ('\u{118b3}', ['\u{118d3}', '\u{0}', '\u{0}']), - ('\u{118b4}', ['\u{118d4}', '\u{0}', '\u{0}']), - ('\u{118b5}', ['\u{118d5}', '\u{0}', '\u{0}']), - ('\u{118b6}', ['\u{118d6}', '\u{0}', '\u{0}']), - ('\u{118b7}', ['\u{118d7}', '\u{0}', '\u{0}']), - ('\u{118b8}', ['\u{118d8}', '\u{0}', '\u{0}']), - ('\u{118b9}', ['\u{118d9}', '\u{0}', '\u{0}']), - ('\u{118ba}', ['\u{118da}', '\u{0}', '\u{0}']), - ('\u{118bb}', ['\u{118db}', '\u{0}', '\u{0}']), - ('\u{118bc}', ['\u{118dc}', '\u{0}', '\u{0}']), - ('\u{118bd}', ['\u{118dd}', '\u{0}', '\u{0}']), - ('\u{118be}', ['\u{118de}', '\u{0}', '\u{0}']), - ('\u{118bf}', ['\u{118df}', '\u{0}', '\u{0}']), - ('\u{16e40}', ['\u{16e60}', '\u{0}', '\u{0}']), - ('\u{16e41}', ['\u{16e61}', '\u{0}', '\u{0}']), - ('\u{16e42}', ['\u{16e62}', '\u{0}', '\u{0}']), - ('\u{16e43}', ['\u{16e63}', '\u{0}', '\u{0}']), - ('\u{16e44}', ['\u{16e64}', '\u{0}', '\u{0}']), - ('\u{16e45}', ['\u{16e65}', '\u{0}', '\u{0}']), - ('\u{16e46}', ['\u{16e66}', '\u{0}', '\u{0}']), - ('\u{16e47}', ['\u{16e67}', '\u{0}', '\u{0}']), - ('\u{16e48}', ['\u{16e68}', '\u{0}', '\u{0}']), - ('\u{16e49}', ['\u{16e69}', '\u{0}', '\u{0}']), - ('\u{16e4a}', ['\u{16e6a}', '\u{0}', '\u{0}']), - ('\u{16e4b}', ['\u{16e6b}', '\u{0}', '\u{0}']), - ('\u{16e4c}', ['\u{16e6c}', '\u{0}', '\u{0}']), - ('\u{16e4d}', ['\u{16e6d}', '\u{0}', '\u{0}']), - ('\u{16e4e}', ['\u{16e6e}', '\u{0}', '\u{0}']), - ('\u{16e4f}', ['\u{16e6f}', '\u{0}', '\u{0}']), - ('\u{16e50}', ['\u{16e70}', '\u{0}', '\u{0}']), - ('\u{16e51}', ['\u{16e71}', '\u{0}', '\u{0}']), - ('\u{16e52}', ['\u{16e72}', '\u{0}', '\u{0}']), - ('\u{16e53}', ['\u{16e73}', '\u{0}', '\u{0}']), - ('\u{16e54}', ['\u{16e74}', '\u{0}', '\u{0}']), - ('\u{16e55}', ['\u{16e75}', '\u{0}', '\u{0}']), - ('\u{16e56}', ['\u{16e76}', '\u{0}', '\u{0}']), - ('\u{16e57}', ['\u{16e77}', '\u{0}', '\u{0}']), - ('\u{16e58}', ['\u{16e78}', '\u{0}', '\u{0}']), - ('\u{16e59}', ['\u{16e79}', '\u{0}', '\u{0}']), - ('\u{16e5a}', ['\u{16e7a}', '\u{0}', '\u{0}']), - ('\u{16e5b}', ['\u{16e7b}', '\u{0}', '\u{0}']), - ('\u{16e5c}', ['\u{16e7c}', '\u{0}', '\u{0}']), - ('\u{16e5d}', ['\u{16e7d}', '\u{0}', '\u{0}']), - ('\u{16e5e}', ['\u{16e7e}', '\u{0}', '\u{0}']), - ('\u{16e5f}', ['\u{16e7f}', '\u{0}', '\u{0}']), - ('\u{16ea0}', ['\u{16ebb}', '\u{0}', '\u{0}']), - ('\u{16ea1}', ['\u{16ebc}', '\u{0}', '\u{0}']), - ('\u{16ea2}', ['\u{16ebd}', '\u{0}', '\u{0}']), - ('\u{16ea3}', ['\u{16ebe}', '\u{0}', '\u{0}']), - ('\u{16ea4}', ['\u{16ebf}', '\u{0}', '\u{0}']), - ('\u{16ea5}', ['\u{16ec0}', '\u{0}', '\u{0}']), - ('\u{16ea6}', ['\u{16ec1}', '\u{0}', '\u{0}']), - ('\u{16ea7}', ['\u{16ec2}', '\u{0}', '\u{0}']), - ('\u{16ea8}', ['\u{16ec3}', '\u{0}', '\u{0}']), - ('\u{16ea9}', ['\u{16ec4}', '\u{0}', '\u{0}']), - ('\u{16eaa}', ['\u{16ec5}', '\u{0}', '\u{0}']), - ('\u{16eab}', ['\u{16ec6}', '\u{0}', '\u{0}']), - ('\u{16eac}', ['\u{16ec7}', '\u{0}', '\u{0}']), - ('\u{16ead}', ['\u{16ec8}', '\u{0}', '\u{0}']), - ('\u{16eae}', ['\u{16ec9}', '\u{0}', '\u{0}']), - ('\u{16eaf}', ['\u{16eca}', '\u{0}', '\u{0}']), - ('\u{16eb0}', ['\u{16ecb}', '\u{0}', '\u{0}']), - ('\u{16eb1}', ['\u{16ecc}', '\u{0}', '\u{0}']), - ('\u{16eb2}', ['\u{16ecd}', '\u{0}', '\u{0}']), - ('\u{16eb3}', ['\u{16ece}', '\u{0}', '\u{0}']), - ('\u{16eb4}', ['\u{16ecf}', '\u{0}', '\u{0}']), - ('\u{16eb5}', ['\u{16ed0}', '\u{0}', '\u{0}']), - ('\u{16eb6}', ['\u{16ed1}', '\u{0}', '\u{0}']), - ('\u{16eb7}', ['\u{16ed2}', '\u{0}', '\u{0}']), - ('\u{16eb8}', ['\u{16ed3}', '\u{0}', '\u{0}']), - ('\u{1e900}', ['\u{1e922}', '\u{0}', '\u{0}']), - ('\u{1e901}', ['\u{1e923}', '\u{0}', '\u{0}']), - ('\u{1e902}', ['\u{1e924}', '\u{0}', '\u{0}']), - ('\u{1e903}', ['\u{1e925}', '\u{0}', '\u{0}']), - ('\u{1e904}', ['\u{1e926}', '\u{0}', '\u{0}']), - ('\u{1e905}', ['\u{1e927}', '\u{0}', '\u{0}']), - ('\u{1e906}', ['\u{1e928}', '\u{0}', '\u{0}']), - ('\u{1e907}', ['\u{1e929}', '\u{0}', '\u{0}']), - ('\u{1e908}', ['\u{1e92a}', '\u{0}', '\u{0}']), - ('\u{1e909}', ['\u{1e92b}', '\u{0}', '\u{0}']), - ('\u{1e90a}', ['\u{1e92c}', '\u{0}', '\u{0}']), - ('\u{1e90b}', ['\u{1e92d}', '\u{0}', '\u{0}']), - ('\u{1e90c}', ['\u{1e92e}', '\u{0}', '\u{0}']), - ('\u{1e90d}', ['\u{1e92f}', '\u{0}', '\u{0}']), - ('\u{1e90e}', ['\u{1e930}', '\u{0}', '\u{0}']), - ('\u{1e90f}', ['\u{1e931}', '\u{0}', '\u{0}']), - ('\u{1e910}', ['\u{1e932}', '\u{0}', '\u{0}']), - ('\u{1e911}', ['\u{1e933}', '\u{0}', '\u{0}']), - ('\u{1e912}', ['\u{1e934}', '\u{0}', '\u{0}']), - ('\u{1e913}', ['\u{1e935}', '\u{0}', '\u{0}']), - ('\u{1e914}', ['\u{1e936}', '\u{0}', '\u{0}']), - ('\u{1e915}', ['\u{1e937}', '\u{0}', '\u{0}']), - ('\u{1e916}', ['\u{1e938}', '\u{0}', '\u{0}']), - ('\u{1e917}', ['\u{1e939}', '\u{0}', '\u{0}']), - ('\u{1e918}', ['\u{1e93a}', '\u{0}', '\u{0}']), - ('\u{1e919}', ['\u{1e93b}', '\u{0}', '\u{0}']), - ('\u{1e91a}', ['\u{1e93c}', '\u{0}', '\u{0}']), - ('\u{1e91b}', ['\u{1e93d}', '\u{0}', '\u{0}']), - ('\u{1e91c}', ['\u{1e93e}', '\u{0}', '\u{0}']), - ('\u{1e91d}', ['\u{1e93f}', '\u{0}', '\u{0}']), - ('\u{1e91e}', ['\u{1e940}', '\u{0}', '\u{0}']), - ('\u{1e91f}', ['\u{1e941}', '\u{0}', '\u{0}']), - ('\u{1e920}', ['\u{1e942}', '\u{0}', '\u{0}']), - ('\u{1e921}', ['\u{1e943}', '\u{0}', '\u{0}']), -]; - -#[rustfmt::skip] -pub(super) static TO_UPPER: &[(char, [char; 3]); 1580] = &[ - ('\u{61}', ['\u{41}', '\u{0}', '\u{0}']), ('\u{62}', ['\u{42}', '\u{0}', '\u{0}']), - ('\u{63}', ['\u{43}', '\u{0}', '\u{0}']), ('\u{64}', ['\u{44}', '\u{0}', '\u{0}']), - ('\u{65}', ['\u{45}', '\u{0}', '\u{0}']), ('\u{66}', ['\u{46}', '\u{0}', '\u{0}']), - ('\u{67}', ['\u{47}', '\u{0}', '\u{0}']), ('\u{68}', ['\u{48}', '\u{0}', '\u{0}']), - ('\u{69}', ['\u{49}', '\u{0}', '\u{0}']), ('\u{6a}', ['\u{4a}', '\u{0}', '\u{0}']), - ('\u{6b}', ['\u{4b}', '\u{0}', '\u{0}']), ('\u{6c}', ['\u{4c}', '\u{0}', '\u{0}']), - ('\u{6d}', ['\u{4d}', '\u{0}', '\u{0}']), ('\u{6e}', ['\u{4e}', '\u{0}', '\u{0}']), - ('\u{6f}', ['\u{4f}', '\u{0}', '\u{0}']), ('\u{70}', ['\u{50}', '\u{0}', '\u{0}']), - ('\u{71}', ['\u{51}', '\u{0}', '\u{0}']), ('\u{72}', ['\u{52}', '\u{0}', '\u{0}']), - ('\u{73}', ['\u{53}', '\u{0}', '\u{0}']), ('\u{74}', ['\u{54}', '\u{0}', '\u{0}']), - ('\u{75}', ['\u{55}', '\u{0}', '\u{0}']), ('\u{76}', ['\u{56}', '\u{0}', '\u{0}']), - ('\u{77}', ['\u{57}', '\u{0}', '\u{0}']), ('\u{78}', ['\u{58}', '\u{0}', '\u{0}']), - ('\u{79}', ['\u{59}', '\u{0}', '\u{0}']), ('\u{7a}', ['\u{5a}', '\u{0}', '\u{0}']), - ('\u{b5}', ['\u{39c}', '\u{0}', '\u{0}']), ('\u{df}', ['\u{53}', '\u{53}', '\u{0}']), - ('\u{e0}', ['\u{c0}', '\u{0}', '\u{0}']), ('\u{e1}', ['\u{c1}', '\u{0}', '\u{0}']), - ('\u{e2}', ['\u{c2}', '\u{0}', '\u{0}']), ('\u{e3}', ['\u{c3}', '\u{0}', '\u{0}']), - ('\u{e4}', ['\u{c4}', '\u{0}', '\u{0}']), ('\u{e5}', ['\u{c5}', '\u{0}', '\u{0}']), - ('\u{e6}', ['\u{c6}', '\u{0}', '\u{0}']), ('\u{e7}', ['\u{c7}', '\u{0}', '\u{0}']), - ('\u{e8}', ['\u{c8}', '\u{0}', '\u{0}']), ('\u{e9}', ['\u{c9}', '\u{0}', '\u{0}']), - ('\u{ea}', ['\u{ca}', '\u{0}', '\u{0}']), ('\u{eb}', ['\u{cb}', '\u{0}', '\u{0}']), - ('\u{ec}', ['\u{cc}', '\u{0}', '\u{0}']), ('\u{ed}', ['\u{cd}', '\u{0}', '\u{0}']), - ('\u{ee}', ['\u{ce}', '\u{0}', '\u{0}']), ('\u{ef}', ['\u{cf}', '\u{0}', '\u{0}']), - ('\u{f0}', ['\u{d0}', '\u{0}', '\u{0}']), ('\u{f1}', ['\u{d1}', '\u{0}', '\u{0}']), - ('\u{f2}', ['\u{d2}', '\u{0}', '\u{0}']), ('\u{f3}', ['\u{d3}', '\u{0}', '\u{0}']), - ('\u{f4}', ['\u{d4}', '\u{0}', '\u{0}']), ('\u{f5}', ['\u{d5}', '\u{0}', '\u{0}']), - ('\u{f6}', ['\u{d6}', '\u{0}', '\u{0}']), ('\u{f8}', ['\u{d8}', '\u{0}', '\u{0}']), - ('\u{f9}', ['\u{d9}', '\u{0}', '\u{0}']), ('\u{fa}', ['\u{da}', '\u{0}', '\u{0}']), - ('\u{fb}', ['\u{db}', '\u{0}', '\u{0}']), ('\u{fc}', ['\u{dc}', '\u{0}', '\u{0}']), - ('\u{fd}', ['\u{dd}', '\u{0}', '\u{0}']), ('\u{fe}', ['\u{de}', '\u{0}', '\u{0}']), - ('\u{ff}', ['\u{178}', '\u{0}', '\u{0}']), ('\u{101}', ['\u{100}', '\u{0}', '\u{0}']), - ('\u{103}', ['\u{102}', '\u{0}', '\u{0}']), ('\u{105}', ['\u{104}', '\u{0}', '\u{0}']), - ('\u{107}', ['\u{106}', '\u{0}', '\u{0}']), ('\u{109}', ['\u{108}', '\u{0}', '\u{0}']), - ('\u{10b}', ['\u{10a}', '\u{0}', '\u{0}']), ('\u{10d}', ['\u{10c}', '\u{0}', '\u{0}']), - ('\u{10f}', ['\u{10e}', '\u{0}', '\u{0}']), ('\u{111}', ['\u{110}', '\u{0}', '\u{0}']), - ('\u{113}', ['\u{112}', '\u{0}', '\u{0}']), ('\u{115}', ['\u{114}', '\u{0}', '\u{0}']), - ('\u{117}', ['\u{116}', '\u{0}', '\u{0}']), ('\u{119}', ['\u{118}', '\u{0}', '\u{0}']), - ('\u{11b}', ['\u{11a}', '\u{0}', '\u{0}']), ('\u{11d}', ['\u{11c}', '\u{0}', '\u{0}']), - ('\u{11f}', ['\u{11e}', '\u{0}', '\u{0}']), ('\u{121}', ['\u{120}', '\u{0}', '\u{0}']), - ('\u{123}', ['\u{122}', '\u{0}', '\u{0}']), ('\u{125}', ['\u{124}', '\u{0}', '\u{0}']), - ('\u{127}', ['\u{126}', '\u{0}', '\u{0}']), ('\u{129}', ['\u{128}', '\u{0}', '\u{0}']), - ('\u{12b}', ['\u{12a}', '\u{0}', '\u{0}']), ('\u{12d}', ['\u{12c}', '\u{0}', '\u{0}']), - ('\u{12f}', ['\u{12e}', '\u{0}', '\u{0}']), ('\u{131}', ['\u{49}', '\u{0}', '\u{0}']), - ('\u{133}', ['\u{132}', '\u{0}', '\u{0}']), ('\u{135}', ['\u{134}', '\u{0}', '\u{0}']), - ('\u{137}', ['\u{136}', '\u{0}', '\u{0}']), ('\u{13a}', ['\u{139}', '\u{0}', '\u{0}']), - ('\u{13c}', ['\u{13b}', '\u{0}', '\u{0}']), ('\u{13e}', ['\u{13d}', '\u{0}', '\u{0}']), - ('\u{140}', ['\u{13f}', '\u{0}', '\u{0}']), ('\u{142}', ['\u{141}', '\u{0}', '\u{0}']), - ('\u{144}', ['\u{143}', '\u{0}', '\u{0}']), ('\u{146}', ['\u{145}', '\u{0}', '\u{0}']), - ('\u{148}', ['\u{147}', '\u{0}', '\u{0}']), ('\u{149}', ['\u{2bc}', '\u{4e}', '\u{0}']), - ('\u{14b}', ['\u{14a}', '\u{0}', '\u{0}']), ('\u{14d}', ['\u{14c}', '\u{0}', '\u{0}']), - ('\u{14f}', ['\u{14e}', '\u{0}', '\u{0}']), ('\u{151}', ['\u{150}', '\u{0}', '\u{0}']), - ('\u{153}', ['\u{152}', '\u{0}', '\u{0}']), ('\u{155}', ['\u{154}', '\u{0}', '\u{0}']), - ('\u{157}', ['\u{156}', '\u{0}', '\u{0}']), ('\u{159}', ['\u{158}', '\u{0}', '\u{0}']), - ('\u{15b}', ['\u{15a}', '\u{0}', '\u{0}']), ('\u{15d}', ['\u{15c}', '\u{0}', '\u{0}']), - ('\u{15f}', ['\u{15e}', '\u{0}', '\u{0}']), ('\u{161}', ['\u{160}', '\u{0}', '\u{0}']), - ('\u{163}', ['\u{162}', '\u{0}', '\u{0}']), ('\u{165}', ['\u{164}', '\u{0}', '\u{0}']), - ('\u{167}', ['\u{166}', '\u{0}', '\u{0}']), ('\u{169}', ['\u{168}', '\u{0}', '\u{0}']), - ('\u{16b}', ['\u{16a}', '\u{0}', '\u{0}']), ('\u{16d}', ['\u{16c}', '\u{0}', '\u{0}']), - ('\u{16f}', ['\u{16e}', '\u{0}', '\u{0}']), ('\u{171}', ['\u{170}', '\u{0}', '\u{0}']), - ('\u{173}', ['\u{172}', '\u{0}', '\u{0}']), ('\u{175}', ['\u{174}', '\u{0}', '\u{0}']), - ('\u{177}', ['\u{176}', '\u{0}', '\u{0}']), ('\u{17a}', ['\u{179}', '\u{0}', '\u{0}']), - ('\u{17c}', ['\u{17b}', '\u{0}', '\u{0}']), ('\u{17e}', ['\u{17d}', '\u{0}', '\u{0}']), - ('\u{17f}', ['\u{53}', '\u{0}', '\u{0}']), ('\u{180}', ['\u{243}', '\u{0}', '\u{0}']), - ('\u{183}', ['\u{182}', '\u{0}', '\u{0}']), ('\u{185}', ['\u{184}', '\u{0}', '\u{0}']), - ('\u{188}', ['\u{187}', '\u{0}', '\u{0}']), ('\u{18c}', ['\u{18b}', '\u{0}', '\u{0}']), - ('\u{192}', ['\u{191}', '\u{0}', '\u{0}']), ('\u{195}', ['\u{1f6}', '\u{0}', '\u{0}']), - ('\u{199}', ['\u{198}', '\u{0}', '\u{0}']), ('\u{19a}', ['\u{23d}', '\u{0}', '\u{0}']), - ('\u{19b}', ['\u{a7dc}', '\u{0}', '\u{0}']), ('\u{19e}', ['\u{220}', '\u{0}', '\u{0}']), - ('\u{1a1}', ['\u{1a0}', '\u{0}', '\u{0}']), ('\u{1a3}', ['\u{1a2}', '\u{0}', '\u{0}']), - ('\u{1a5}', ['\u{1a4}', '\u{0}', '\u{0}']), ('\u{1a8}', ['\u{1a7}', '\u{0}', '\u{0}']), - ('\u{1ad}', ['\u{1ac}', '\u{0}', '\u{0}']), ('\u{1b0}', ['\u{1af}', '\u{0}', '\u{0}']), - ('\u{1b4}', ['\u{1b3}', '\u{0}', '\u{0}']), ('\u{1b6}', ['\u{1b5}', '\u{0}', '\u{0}']), - ('\u{1b9}', ['\u{1b8}', '\u{0}', '\u{0}']), ('\u{1bd}', ['\u{1bc}', '\u{0}', '\u{0}']), - ('\u{1bf}', ['\u{1f7}', '\u{0}', '\u{0}']), ('\u{1c5}', ['\u{1c4}', '\u{0}', '\u{0}']), - ('\u{1c6}', ['\u{1c4}', '\u{0}', '\u{0}']), ('\u{1c8}', ['\u{1c7}', '\u{0}', '\u{0}']), - ('\u{1c9}', ['\u{1c7}', '\u{0}', '\u{0}']), ('\u{1cb}', ['\u{1ca}', '\u{0}', '\u{0}']), - ('\u{1cc}', ['\u{1ca}', '\u{0}', '\u{0}']), ('\u{1ce}', ['\u{1cd}', '\u{0}', '\u{0}']), - ('\u{1d0}', ['\u{1cf}', '\u{0}', '\u{0}']), ('\u{1d2}', ['\u{1d1}', '\u{0}', '\u{0}']), - ('\u{1d4}', ['\u{1d3}', '\u{0}', '\u{0}']), ('\u{1d6}', ['\u{1d5}', '\u{0}', '\u{0}']), - ('\u{1d8}', ['\u{1d7}', '\u{0}', '\u{0}']), ('\u{1da}', ['\u{1d9}', '\u{0}', '\u{0}']), - ('\u{1dc}', ['\u{1db}', '\u{0}', '\u{0}']), ('\u{1dd}', ['\u{18e}', '\u{0}', '\u{0}']), - ('\u{1df}', ['\u{1de}', '\u{0}', '\u{0}']), ('\u{1e1}', ['\u{1e0}', '\u{0}', '\u{0}']), - ('\u{1e3}', ['\u{1e2}', '\u{0}', '\u{0}']), ('\u{1e5}', ['\u{1e4}', '\u{0}', '\u{0}']), - ('\u{1e7}', ['\u{1e6}', '\u{0}', '\u{0}']), ('\u{1e9}', ['\u{1e8}', '\u{0}', '\u{0}']), - ('\u{1eb}', ['\u{1ea}', '\u{0}', '\u{0}']), ('\u{1ed}', ['\u{1ec}', '\u{0}', '\u{0}']), - ('\u{1ef}', ['\u{1ee}', '\u{0}', '\u{0}']), ('\u{1f0}', ['\u{4a}', '\u{30c}', '\u{0}']), - ('\u{1f2}', ['\u{1f1}', '\u{0}', '\u{0}']), ('\u{1f3}', ['\u{1f1}', '\u{0}', '\u{0}']), - ('\u{1f5}', ['\u{1f4}', '\u{0}', '\u{0}']), ('\u{1f9}', ['\u{1f8}', '\u{0}', '\u{0}']), - ('\u{1fb}', ['\u{1fa}', '\u{0}', '\u{0}']), ('\u{1fd}', ['\u{1fc}', '\u{0}', '\u{0}']), - ('\u{1ff}', ['\u{1fe}', '\u{0}', '\u{0}']), ('\u{201}', ['\u{200}', '\u{0}', '\u{0}']), - ('\u{203}', ['\u{202}', '\u{0}', '\u{0}']), ('\u{205}', ['\u{204}', '\u{0}', '\u{0}']), - ('\u{207}', ['\u{206}', '\u{0}', '\u{0}']), ('\u{209}', ['\u{208}', '\u{0}', '\u{0}']), - ('\u{20b}', ['\u{20a}', '\u{0}', '\u{0}']), ('\u{20d}', ['\u{20c}', '\u{0}', '\u{0}']), - ('\u{20f}', ['\u{20e}', '\u{0}', '\u{0}']), ('\u{211}', ['\u{210}', '\u{0}', '\u{0}']), - ('\u{213}', ['\u{212}', '\u{0}', '\u{0}']), ('\u{215}', ['\u{214}', '\u{0}', '\u{0}']), - ('\u{217}', ['\u{216}', '\u{0}', '\u{0}']), ('\u{219}', ['\u{218}', '\u{0}', '\u{0}']), - ('\u{21b}', ['\u{21a}', '\u{0}', '\u{0}']), ('\u{21d}', ['\u{21c}', '\u{0}', '\u{0}']), - ('\u{21f}', ['\u{21e}', '\u{0}', '\u{0}']), ('\u{223}', ['\u{222}', '\u{0}', '\u{0}']), - ('\u{225}', ['\u{224}', '\u{0}', '\u{0}']), ('\u{227}', ['\u{226}', '\u{0}', '\u{0}']), - ('\u{229}', ['\u{228}', '\u{0}', '\u{0}']), ('\u{22b}', ['\u{22a}', '\u{0}', '\u{0}']), - ('\u{22d}', ['\u{22c}', '\u{0}', '\u{0}']), ('\u{22f}', ['\u{22e}', '\u{0}', '\u{0}']), - ('\u{231}', ['\u{230}', '\u{0}', '\u{0}']), ('\u{233}', ['\u{232}', '\u{0}', '\u{0}']), - ('\u{23c}', ['\u{23b}', '\u{0}', '\u{0}']), ('\u{23f}', ['\u{2c7e}', '\u{0}', '\u{0}']), - ('\u{240}', ['\u{2c7f}', '\u{0}', '\u{0}']), ('\u{242}', ['\u{241}', '\u{0}', '\u{0}']), - ('\u{247}', ['\u{246}', '\u{0}', '\u{0}']), ('\u{249}', ['\u{248}', '\u{0}', '\u{0}']), - ('\u{24b}', ['\u{24a}', '\u{0}', '\u{0}']), ('\u{24d}', ['\u{24c}', '\u{0}', '\u{0}']), - ('\u{24f}', ['\u{24e}', '\u{0}', '\u{0}']), ('\u{250}', ['\u{2c6f}', '\u{0}', '\u{0}']), - ('\u{251}', ['\u{2c6d}', '\u{0}', '\u{0}']), ('\u{252}', ['\u{2c70}', '\u{0}', '\u{0}']), - ('\u{253}', ['\u{181}', '\u{0}', '\u{0}']), ('\u{254}', ['\u{186}', '\u{0}', '\u{0}']), - ('\u{256}', ['\u{189}', '\u{0}', '\u{0}']), ('\u{257}', ['\u{18a}', '\u{0}', '\u{0}']), - ('\u{259}', ['\u{18f}', '\u{0}', '\u{0}']), ('\u{25b}', ['\u{190}', '\u{0}', '\u{0}']), - ('\u{25c}', ['\u{a7ab}', '\u{0}', '\u{0}']), ('\u{260}', ['\u{193}', '\u{0}', '\u{0}']), - ('\u{261}', ['\u{a7ac}', '\u{0}', '\u{0}']), ('\u{263}', ['\u{194}', '\u{0}', '\u{0}']), - ('\u{264}', ['\u{a7cb}', '\u{0}', '\u{0}']), ('\u{265}', ['\u{a78d}', '\u{0}', '\u{0}']), - ('\u{266}', ['\u{a7aa}', '\u{0}', '\u{0}']), ('\u{268}', ['\u{197}', '\u{0}', '\u{0}']), - ('\u{269}', ['\u{196}', '\u{0}', '\u{0}']), ('\u{26a}', ['\u{a7ae}', '\u{0}', '\u{0}']), - ('\u{26b}', ['\u{2c62}', '\u{0}', '\u{0}']), ('\u{26c}', ['\u{a7ad}', '\u{0}', '\u{0}']), - ('\u{26f}', ['\u{19c}', '\u{0}', '\u{0}']), ('\u{271}', ['\u{2c6e}', '\u{0}', '\u{0}']), - ('\u{272}', ['\u{19d}', '\u{0}', '\u{0}']), ('\u{275}', ['\u{19f}', '\u{0}', '\u{0}']), - ('\u{27d}', ['\u{2c64}', '\u{0}', '\u{0}']), ('\u{280}', ['\u{1a6}', '\u{0}', '\u{0}']), - ('\u{282}', ['\u{a7c5}', '\u{0}', '\u{0}']), ('\u{283}', ['\u{1a9}', '\u{0}', '\u{0}']), - ('\u{287}', ['\u{a7b1}', '\u{0}', '\u{0}']), ('\u{288}', ['\u{1ae}', '\u{0}', '\u{0}']), - ('\u{289}', ['\u{244}', '\u{0}', '\u{0}']), ('\u{28a}', ['\u{1b1}', '\u{0}', '\u{0}']), - ('\u{28b}', ['\u{1b2}', '\u{0}', '\u{0}']), ('\u{28c}', ['\u{245}', '\u{0}', '\u{0}']), - ('\u{292}', ['\u{1b7}', '\u{0}', '\u{0}']), ('\u{29d}', ['\u{a7b2}', '\u{0}', '\u{0}']), - ('\u{29e}', ['\u{a7b0}', '\u{0}', '\u{0}']), ('\u{345}', ['\u{399}', '\u{0}', '\u{0}']), - ('\u{371}', ['\u{370}', '\u{0}', '\u{0}']), ('\u{373}', ['\u{372}', '\u{0}', '\u{0}']), - ('\u{377}', ['\u{376}', '\u{0}', '\u{0}']), ('\u{37b}', ['\u{3fd}', '\u{0}', '\u{0}']), - ('\u{37c}', ['\u{3fe}', '\u{0}', '\u{0}']), ('\u{37d}', ['\u{3ff}', '\u{0}', '\u{0}']), - ('\u{390}', ['\u{399}', '\u{308}', '\u{301}']), ('\u{3ac}', ['\u{386}', '\u{0}', '\u{0}']), - ('\u{3ad}', ['\u{388}', '\u{0}', '\u{0}']), ('\u{3ae}', ['\u{389}', '\u{0}', '\u{0}']), - ('\u{3af}', ['\u{38a}', '\u{0}', '\u{0}']), ('\u{3b0}', ['\u{3a5}', '\u{308}', '\u{301}']), - ('\u{3b1}', ['\u{391}', '\u{0}', '\u{0}']), ('\u{3b2}', ['\u{392}', '\u{0}', '\u{0}']), - ('\u{3b3}', ['\u{393}', '\u{0}', '\u{0}']), ('\u{3b4}', ['\u{394}', '\u{0}', '\u{0}']), - ('\u{3b5}', ['\u{395}', '\u{0}', '\u{0}']), ('\u{3b6}', ['\u{396}', '\u{0}', '\u{0}']), - ('\u{3b7}', ['\u{397}', '\u{0}', '\u{0}']), ('\u{3b8}', ['\u{398}', '\u{0}', '\u{0}']), - ('\u{3b9}', ['\u{399}', '\u{0}', '\u{0}']), ('\u{3ba}', ['\u{39a}', '\u{0}', '\u{0}']), - ('\u{3bb}', ['\u{39b}', '\u{0}', '\u{0}']), ('\u{3bc}', ['\u{39c}', '\u{0}', '\u{0}']), - ('\u{3bd}', ['\u{39d}', '\u{0}', '\u{0}']), ('\u{3be}', ['\u{39e}', '\u{0}', '\u{0}']), - ('\u{3bf}', ['\u{39f}', '\u{0}', '\u{0}']), ('\u{3c0}', ['\u{3a0}', '\u{0}', '\u{0}']), - ('\u{3c1}', ['\u{3a1}', '\u{0}', '\u{0}']), ('\u{3c2}', ['\u{3a3}', '\u{0}', '\u{0}']), - ('\u{3c3}', ['\u{3a3}', '\u{0}', '\u{0}']), ('\u{3c4}', ['\u{3a4}', '\u{0}', '\u{0}']), - ('\u{3c5}', ['\u{3a5}', '\u{0}', '\u{0}']), ('\u{3c6}', ['\u{3a6}', '\u{0}', '\u{0}']), - ('\u{3c7}', ['\u{3a7}', '\u{0}', '\u{0}']), ('\u{3c8}', ['\u{3a8}', '\u{0}', '\u{0}']), - ('\u{3c9}', ['\u{3a9}', '\u{0}', '\u{0}']), ('\u{3ca}', ['\u{3aa}', '\u{0}', '\u{0}']), - ('\u{3cb}', ['\u{3ab}', '\u{0}', '\u{0}']), ('\u{3cc}', ['\u{38c}', '\u{0}', '\u{0}']), - ('\u{3cd}', ['\u{38e}', '\u{0}', '\u{0}']), ('\u{3ce}', ['\u{38f}', '\u{0}', '\u{0}']), - ('\u{3d0}', ['\u{392}', '\u{0}', '\u{0}']), ('\u{3d1}', ['\u{398}', '\u{0}', '\u{0}']), - ('\u{3d5}', ['\u{3a6}', '\u{0}', '\u{0}']), ('\u{3d6}', ['\u{3a0}', '\u{0}', '\u{0}']), - ('\u{3d7}', ['\u{3cf}', '\u{0}', '\u{0}']), ('\u{3d9}', ['\u{3d8}', '\u{0}', '\u{0}']), - ('\u{3db}', ['\u{3da}', '\u{0}', '\u{0}']), ('\u{3dd}', ['\u{3dc}', '\u{0}', '\u{0}']), - ('\u{3df}', ['\u{3de}', '\u{0}', '\u{0}']), ('\u{3e1}', ['\u{3e0}', '\u{0}', '\u{0}']), - ('\u{3e3}', ['\u{3e2}', '\u{0}', '\u{0}']), ('\u{3e5}', ['\u{3e4}', '\u{0}', '\u{0}']), - ('\u{3e7}', ['\u{3e6}', '\u{0}', '\u{0}']), ('\u{3e9}', ['\u{3e8}', '\u{0}', '\u{0}']), - ('\u{3eb}', ['\u{3ea}', '\u{0}', '\u{0}']), ('\u{3ed}', ['\u{3ec}', '\u{0}', '\u{0}']), - ('\u{3ef}', ['\u{3ee}', '\u{0}', '\u{0}']), ('\u{3f0}', ['\u{39a}', '\u{0}', '\u{0}']), - ('\u{3f1}', ['\u{3a1}', '\u{0}', '\u{0}']), ('\u{3f2}', ['\u{3f9}', '\u{0}', '\u{0}']), - ('\u{3f3}', ['\u{37f}', '\u{0}', '\u{0}']), ('\u{3f5}', ['\u{395}', '\u{0}', '\u{0}']), - ('\u{3f8}', ['\u{3f7}', '\u{0}', '\u{0}']), ('\u{3fb}', ['\u{3fa}', '\u{0}', '\u{0}']), - ('\u{430}', ['\u{410}', '\u{0}', '\u{0}']), ('\u{431}', ['\u{411}', '\u{0}', '\u{0}']), - ('\u{432}', ['\u{412}', '\u{0}', '\u{0}']), ('\u{433}', ['\u{413}', '\u{0}', '\u{0}']), - ('\u{434}', ['\u{414}', '\u{0}', '\u{0}']), ('\u{435}', ['\u{415}', '\u{0}', '\u{0}']), - ('\u{436}', ['\u{416}', '\u{0}', '\u{0}']), ('\u{437}', ['\u{417}', '\u{0}', '\u{0}']), - ('\u{438}', ['\u{418}', '\u{0}', '\u{0}']), ('\u{439}', ['\u{419}', '\u{0}', '\u{0}']), - ('\u{43a}', ['\u{41a}', '\u{0}', '\u{0}']), ('\u{43b}', ['\u{41b}', '\u{0}', '\u{0}']), - ('\u{43c}', ['\u{41c}', '\u{0}', '\u{0}']), ('\u{43d}', ['\u{41d}', '\u{0}', '\u{0}']), - ('\u{43e}', ['\u{41e}', '\u{0}', '\u{0}']), ('\u{43f}', ['\u{41f}', '\u{0}', '\u{0}']), - ('\u{440}', ['\u{420}', '\u{0}', '\u{0}']), ('\u{441}', ['\u{421}', '\u{0}', '\u{0}']), - ('\u{442}', ['\u{422}', '\u{0}', '\u{0}']), ('\u{443}', ['\u{423}', '\u{0}', '\u{0}']), - ('\u{444}', ['\u{424}', '\u{0}', '\u{0}']), ('\u{445}', ['\u{425}', '\u{0}', '\u{0}']), - ('\u{446}', ['\u{426}', '\u{0}', '\u{0}']), ('\u{447}', ['\u{427}', '\u{0}', '\u{0}']), - ('\u{448}', ['\u{428}', '\u{0}', '\u{0}']), ('\u{449}', ['\u{429}', '\u{0}', '\u{0}']), - ('\u{44a}', ['\u{42a}', '\u{0}', '\u{0}']), ('\u{44b}', ['\u{42b}', '\u{0}', '\u{0}']), - ('\u{44c}', ['\u{42c}', '\u{0}', '\u{0}']), ('\u{44d}', ['\u{42d}', '\u{0}', '\u{0}']), - ('\u{44e}', ['\u{42e}', '\u{0}', '\u{0}']), ('\u{44f}', ['\u{42f}', '\u{0}', '\u{0}']), - ('\u{450}', ['\u{400}', '\u{0}', '\u{0}']), ('\u{451}', ['\u{401}', '\u{0}', '\u{0}']), - ('\u{452}', ['\u{402}', '\u{0}', '\u{0}']), ('\u{453}', ['\u{403}', '\u{0}', '\u{0}']), - ('\u{454}', ['\u{404}', '\u{0}', '\u{0}']), ('\u{455}', ['\u{405}', '\u{0}', '\u{0}']), - ('\u{456}', ['\u{406}', '\u{0}', '\u{0}']), ('\u{457}', ['\u{407}', '\u{0}', '\u{0}']), - ('\u{458}', ['\u{408}', '\u{0}', '\u{0}']), ('\u{459}', ['\u{409}', '\u{0}', '\u{0}']), - ('\u{45a}', ['\u{40a}', '\u{0}', '\u{0}']), ('\u{45b}', ['\u{40b}', '\u{0}', '\u{0}']), - ('\u{45c}', ['\u{40c}', '\u{0}', '\u{0}']), ('\u{45d}', ['\u{40d}', '\u{0}', '\u{0}']), - ('\u{45e}', ['\u{40e}', '\u{0}', '\u{0}']), ('\u{45f}', ['\u{40f}', '\u{0}', '\u{0}']), - ('\u{461}', ['\u{460}', '\u{0}', '\u{0}']), ('\u{463}', ['\u{462}', '\u{0}', '\u{0}']), - ('\u{465}', ['\u{464}', '\u{0}', '\u{0}']), ('\u{467}', ['\u{466}', '\u{0}', '\u{0}']), - ('\u{469}', ['\u{468}', '\u{0}', '\u{0}']), ('\u{46b}', ['\u{46a}', '\u{0}', '\u{0}']), - ('\u{46d}', ['\u{46c}', '\u{0}', '\u{0}']), ('\u{46f}', ['\u{46e}', '\u{0}', '\u{0}']), - ('\u{471}', ['\u{470}', '\u{0}', '\u{0}']), ('\u{473}', ['\u{472}', '\u{0}', '\u{0}']), - ('\u{475}', ['\u{474}', '\u{0}', '\u{0}']), ('\u{477}', ['\u{476}', '\u{0}', '\u{0}']), - ('\u{479}', ['\u{478}', '\u{0}', '\u{0}']), ('\u{47b}', ['\u{47a}', '\u{0}', '\u{0}']), - ('\u{47d}', ['\u{47c}', '\u{0}', '\u{0}']), ('\u{47f}', ['\u{47e}', '\u{0}', '\u{0}']), - ('\u{481}', ['\u{480}', '\u{0}', '\u{0}']), ('\u{48b}', ['\u{48a}', '\u{0}', '\u{0}']), - ('\u{48d}', ['\u{48c}', '\u{0}', '\u{0}']), ('\u{48f}', ['\u{48e}', '\u{0}', '\u{0}']), - ('\u{491}', ['\u{490}', '\u{0}', '\u{0}']), ('\u{493}', ['\u{492}', '\u{0}', '\u{0}']), - ('\u{495}', ['\u{494}', '\u{0}', '\u{0}']), ('\u{497}', ['\u{496}', '\u{0}', '\u{0}']), - ('\u{499}', ['\u{498}', '\u{0}', '\u{0}']), ('\u{49b}', ['\u{49a}', '\u{0}', '\u{0}']), - ('\u{49d}', ['\u{49c}', '\u{0}', '\u{0}']), ('\u{49f}', ['\u{49e}', '\u{0}', '\u{0}']), - ('\u{4a1}', ['\u{4a0}', '\u{0}', '\u{0}']), ('\u{4a3}', ['\u{4a2}', '\u{0}', '\u{0}']), - ('\u{4a5}', ['\u{4a4}', '\u{0}', '\u{0}']), ('\u{4a7}', ['\u{4a6}', '\u{0}', '\u{0}']), - ('\u{4a9}', ['\u{4a8}', '\u{0}', '\u{0}']), ('\u{4ab}', ['\u{4aa}', '\u{0}', '\u{0}']), - ('\u{4ad}', ['\u{4ac}', '\u{0}', '\u{0}']), ('\u{4af}', ['\u{4ae}', '\u{0}', '\u{0}']), - ('\u{4b1}', ['\u{4b0}', '\u{0}', '\u{0}']), ('\u{4b3}', ['\u{4b2}', '\u{0}', '\u{0}']), - ('\u{4b5}', ['\u{4b4}', '\u{0}', '\u{0}']), ('\u{4b7}', ['\u{4b6}', '\u{0}', '\u{0}']), - ('\u{4b9}', ['\u{4b8}', '\u{0}', '\u{0}']), ('\u{4bb}', ['\u{4ba}', '\u{0}', '\u{0}']), - ('\u{4bd}', ['\u{4bc}', '\u{0}', '\u{0}']), ('\u{4bf}', ['\u{4be}', '\u{0}', '\u{0}']), - ('\u{4c2}', ['\u{4c1}', '\u{0}', '\u{0}']), ('\u{4c4}', ['\u{4c3}', '\u{0}', '\u{0}']), - ('\u{4c6}', ['\u{4c5}', '\u{0}', '\u{0}']), ('\u{4c8}', ['\u{4c7}', '\u{0}', '\u{0}']), - ('\u{4ca}', ['\u{4c9}', '\u{0}', '\u{0}']), ('\u{4cc}', ['\u{4cb}', '\u{0}', '\u{0}']), - ('\u{4ce}', ['\u{4cd}', '\u{0}', '\u{0}']), ('\u{4cf}', ['\u{4c0}', '\u{0}', '\u{0}']), - ('\u{4d1}', ['\u{4d0}', '\u{0}', '\u{0}']), ('\u{4d3}', ['\u{4d2}', '\u{0}', '\u{0}']), - ('\u{4d5}', ['\u{4d4}', '\u{0}', '\u{0}']), ('\u{4d7}', ['\u{4d6}', '\u{0}', '\u{0}']), - ('\u{4d9}', ['\u{4d8}', '\u{0}', '\u{0}']), ('\u{4db}', ['\u{4da}', '\u{0}', '\u{0}']), - ('\u{4dd}', ['\u{4dc}', '\u{0}', '\u{0}']), ('\u{4df}', ['\u{4de}', '\u{0}', '\u{0}']), - ('\u{4e1}', ['\u{4e0}', '\u{0}', '\u{0}']), ('\u{4e3}', ['\u{4e2}', '\u{0}', '\u{0}']), - ('\u{4e5}', ['\u{4e4}', '\u{0}', '\u{0}']), ('\u{4e7}', ['\u{4e6}', '\u{0}', '\u{0}']), - ('\u{4e9}', ['\u{4e8}', '\u{0}', '\u{0}']), ('\u{4eb}', ['\u{4ea}', '\u{0}', '\u{0}']), - ('\u{4ed}', ['\u{4ec}', '\u{0}', '\u{0}']), ('\u{4ef}', ['\u{4ee}', '\u{0}', '\u{0}']), - ('\u{4f1}', ['\u{4f0}', '\u{0}', '\u{0}']), ('\u{4f3}', ['\u{4f2}', '\u{0}', '\u{0}']), - ('\u{4f5}', ['\u{4f4}', '\u{0}', '\u{0}']), ('\u{4f7}', ['\u{4f6}', '\u{0}', '\u{0}']), - ('\u{4f9}', ['\u{4f8}', '\u{0}', '\u{0}']), ('\u{4fb}', ['\u{4fa}', '\u{0}', '\u{0}']), - ('\u{4fd}', ['\u{4fc}', '\u{0}', '\u{0}']), ('\u{4ff}', ['\u{4fe}', '\u{0}', '\u{0}']), - ('\u{501}', ['\u{500}', '\u{0}', '\u{0}']), ('\u{503}', ['\u{502}', '\u{0}', '\u{0}']), - ('\u{505}', ['\u{504}', '\u{0}', '\u{0}']), ('\u{507}', ['\u{506}', '\u{0}', '\u{0}']), - ('\u{509}', ['\u{508}', '\u{0}', '\u{0}']), ('\u{50b}', ['\u{50a}', '\u{0}', '\u{0}']), - ('\u{50d}', ['\u{50c}', '\u{0}', '\u{0}']), ('\u{50f}', ['\u{50e}', '\u{0}', '\u{0}']), - ('\u{511}', ['\u{510}', '\u{0}', '\u{0}']), ('\u{513}', ['\u{512}', '\u{0}', '\u{0}']), - ('\u{515}', ['\u{514}', '\u{0}', '\u{0}']), ('\u{517}', ['\u{516}', '\u{0}', '\u{0}']), - ('\u{519}', ['\u{518}', '\u{0}', '\u{0}']), ('\u{51b}', ['\u{51a}', '\u{0}', '\u{0}']), - ('\u{51d}', ['\u{51c}', '\u{0}', '\u{0}']), ('\u{51f}', ['\u{51e}', '\u{0}', '\u{0}']), - ('\u{521}', ['\u{520}', '\u{0}', '\u{0}']), ('\u{523}', ['\u{522}', '\u{0}', '\u{0}']), - ('\u{525}', ['\u{524}', '\u{0}', '\u{0}']), ('\u{527}', ['\u{526}', '\u{0}', '\u{0}']), - ('\u{529}', ['\u{528}', '\u{0}', '\u{0}']), ('\u{52b}', ['\u{52a}', '\u{0}', '\u{0}']), - ('\u{52d}', ['\u{52c}', '\u{0}', '\u{0}']), ('\u{52f}', ['\u{52e}', '\u{0}', '\u{0}']), - ('\u{561}', ['\u{531}', '\u{0}', '\u{0}']), ('\u{562}', ['\u{532}', '\u{0}', '\u{0}']), - ('\u{563}', ['\u{533}', '\u{0}', '\u{0}']), ('\u{564}', ['\u{534}', '\u{0}', '\u{0}']), - ('\u{565}', ['\u{535}', '\u{0}', '\u{0}']), ('\u{566}', ['\u{536}', '\u{0}', '\u{0}']), - ('\u{567}', ['\u{537}', '\u{0}', '\u{0}']), ('\u{568}', ['\u{538}', '\u{0}', '\u{0}']), - ('\u{569}', ['\u{539}', '\u{0}', '\u{0}']), ('\u{56a}', ['\u{53a}', '\u{0}', '\u{0}']), - ('\u{56b}', ['\u{53b}', '\u{0}', '\u{0}']), ('\u{56c}', ['\u{53c}', '\u{0}', '\u{0}']), - ('\u{56d}', ['\u{53d}', '\u{0}', '\u{0}']), ('\u{56e}', ['\u{53e}', '\u{0}', '\u{0}']), - ('\u{56f}', ['\u{53f}', '\u{0}', '\u{0}']), ('\u{570}', ['\u{540}', '\u{0}', '\u{0}']), - ('\u{571}', ['\u{541}', '\u{0}', '\u{0}']), ('\u{572}', ['\u{542}', '\u{0}', '\u{0}']), - ('\u{573}', ['\u{543}', '\u{0}', '\u{0}']), ('\u{574}', ['\u{544}', '\u{0}', '\u{0}']), - ('\u{575}', ['\u{545}', '\u{0}', '\u{0}']), ('\u{576}', ['\u{546}', '\u{0}', '\u{0}']), - ('\u{577}', ['\u{547}', '\u{0}', '\u{0}']), ('\u{578}', ['\u{548}', '\u{0}', '\u{0}']), - ('\u{579}', ['\u{549}', '\u{0}', '\u{0}']), ('\u{57a}', ['\u{54a}', '\u{0}', '\u{0}']), - ('\u{57b}', ['\u{54b}', '\u{0}', '\u{0}']), ('\u{57c}', ['\u{54c}', '\u{0}', '\u{0}']), - ('\u{57d}', ['\u{54d}', '\u{0}', '\u{0}']), ('\u{57e}', ['\u{54e}', '\u{0}', '\u{0}']), - ('\u{57f}', ['\u{54f}', '\u{0}', '\u{0}']), ('\u{580}', ['\u{550}', '\u{0}', '\u{0}']), - ('\u{581}', ['\u{551}', '\u{0}', '\u{0}']), ('\u{582}', ['\u{552}', '\u{0}', '\u{0}']), - ('\u{583}', ['\u{553}', '\u{0}', '\u{0}']), ('\u{584}', ['\u{554}', '\u{0}', '\u{0}']), - ('\u{585}', ['\u{555}', '\u{0}', '\u{0}']), ('\u{586}', ['\u{556}', '\u{0}', '\u{0}']), - ('\u{587}', ['\u{535}', '\u{552}', '\u{0}']), ('\u{10d0}', ['\u{1c90}', '\u{0}', '\u{0}']), - ('\u{10d1}', ['\u{1c91}', '\u{0}', '\u{0}']), ('\u{10d2}', ['\u{1c92}', '\u{0}', '\u{0}']), - ('\u{10d3}', ['\u{1c93}', '\u{0}', '\u{0}']), ('\u{10d4}', ['\u{1c94}', '\u{0}', '\u{0}']), - ('\u{10d5}', ['\u{1c95}', '\u{0}', '\u{0}']), ('\u{10d6}', ['\u{1c96}', '\u{0}', '\u{0}']), - ('\u{10d7}', ['\u{1c97}', '\u{0}', '\u{0}']), ('\u{10d8}', ['\u{1c98}', '\u{0}', '\u{0}']), - ('\u{10d9}', ['\u{1c99}', '\u{0}', '\u{0}']), ('\u{10da}', ['\u{1c9a}', '\u{0}', '\u{0}']), - ('\u{10db}', ['\u{1c9b}', '\u{0}', '\u{0}']), ('\u{10dc}', ['\u{1c9c}', '\u{0}', '\u{0}']), - ('\u{10dd}', ['\u{1c9d}', '\u{0}', '\u{0}']), ('\u{10de}', ['\u{1c9e}', '\u{0}', '\u{0}']), - ('\u{10df}', ['\u{1c9f}', '\u{0}', '\u{0}']), ('\u{10e0}', ['\u{1ca0}', '\u{0}', '\u{0}']), - ('\u{10e1}', ['\u{1ca1}', '\u{0}', '\u{0}']), ('\u{10e2}', ['\u{1ca2}', '\u{0}', '\u{0}']), - ('\u{10e3}', ['\u{1ca3}', '\u{0}', '\u{0}']), ('\u{10e4}', ['\u{1ca4}', '\u{0}', '\u{0}']), - ('\u{10e5}', ['\u{1ca5}', '\u{0}', '\u{0}']), ('\u{10e6}', ['\u{1ca6}', '\u{0}', '\u{0}']), - ('\u{10e7}', ['\u{1ca7}', '\u{0}', '\u{0}']), ('\u{10e8}', ['\u{1ca8}', '\u{0}', '\u{0}']), - ('\u{10e9}', ['\u{1ca9}', '\u{0}', '\u{0}']), ('\u{10ea}', ['\u{1caa}', '\u{0}', '\u{0}']), - ('\u{10eb}', ['\u{1cab}', '\u{0}', '\u{0}']), ('\u{10ec}', ['\u{1cac}', '\u{0}', '\u{0}']), - ('\u{10ed}', ['\u{1cad}', '\u{0}', '\u{0}']), ('\u{10ee}', ['\u{1cae}', '\u{0}', '\u{0}']), - ('\u{10ef}', ['\u{1caf}', '\u{0}', '\u{0}']), ('\u{10f0}', ['\u{1cb0}', '\u{0}', '\u{0}']), - ('\u{10f1}', ['\u{1cb1}', '\u{0}', '\u{0}']), ('\u{10f2}', ['\u{1cb2}', '\u{0}', '\u{0}']), - ('\u{10f3}', ['\u{1cb3}', '\u{0}', '\u{0}']), ('\u{10f4}', ['\u{1cb4}', '\u{0}', '\u{0}']), - ('\u{10f5}', ['\u{1cb5}', '\u{0}', '\u{0}']), ('\u{10f6}', ['\u{1cb6}', '\u{0}', '\u{0}']), - ('\u{10f7}', ['\u{1cb7}', '\u{0}', '\u{0}']), ('\u{10f8}', ['\u{1cb8}', '\u{0}', '\u{0}']), - ('\u{10f9}', ['\u{1cb9}', '\u{0}', '\u{0}']), ('\u{10fa}', ['\u{1cba}', '\u{0}', '\u{0}']), - ('\u{10fd}', ['\u{1cbd}', '\u{0}', '\u{0}']), ('\u{10fe}', ['\u{1cbe}', '\u{0}', '\u{0}']), - ('\u{10ff}', ['\u{1cbf}', '\u{0}', '\u{0}']), ('\u{13f8}', ['\u{13f0}', '\u{0}', '\u{0}']), - ('\u{13f9}', ['\u{13f1}', '\u{0}', '\u{0}']), ('\u{13fa}', ['\u{13f2}', '\u{0}', '\u{0}']), - ('\u{13fb}', ['\u{13f3}', '\u{0}', '\u{0}']), ('\u{13fc}', ['\u{13f4}', '\u{0}', '\u{0}']), - ('\u{13fd}', ['\u{13f5}', '\u{0}', '\u{0}']), ('\u{1c80}', ['\u{412}', '\u{0}', '\u{0}']), - ('\u{1c81}', ['\u{414}', '\u{0}', '\u{0}']), ('\u{1c82}', ['\u{41e}', '\u{0}', '\u{0}']), - ('\u{1c83}', ['\u{421}', '\u{0}', '\u{0}']), ('\u{1c84}', ['\u{422}', '\u{0}', '\u{0}']), - ('\u{1c85}', ['\u{422}', '\u{0}', '\u{0}']), ('\u{1c86}', ['\u{42a}', '\u{0}', '\u{0}']), - ('\u{1c87}', ['\u{462}', '\u{0}', '\u{0}']), ('\u{1c88}', ['\u{a64a}', '\u{0}', '\u{0}']), - ('\u{1c8a}', ['\u{1c89}', '\u{0}', '\u{0}']), ('\u{1d79}', ['\u{a77d}', '\u{0}', '\u{0}']), - ('\u{1d7d}', ['\u{2c63}', '\u{0}', '\u{0}']), ('\u{1d8e}', ['\u{a7c6}', '\u{0}', '\u{0}']), - ('\u{1e01}', ['\u{1e00}', '\u{0}', '\u{0}']), ('\u{1e03}', ['\u{1e02}', '\u{0}', '\u{0}']), - ('\u{1e05}', ['\u{1e04}', '\u{0}', '\u{0}']), ('\u{1e07}', ['\u{1e06}', '\u{0}', '\u{0}']), - ('\u{1e09}', ['\u{1e08}', '\u{0}', '\u{0}']), ('\u{1e0b}', ['\u{1e0a}', '\u{0}', '\u{0}']), - ('\u{1e0d}', ['\u{1e0c}', '\u{0}', '\u{0}']), ('\u{1e0f}', ['\u{1e0e}', '\u{0}', '\u{0}']), - ('\u{1e11}', ['\u{1e10}', '\u{0}', '\u{0}']), ('\u{1e13}', ['\u{1e12}', '\u{0}', '\u{0}']), - ('\u{1e15}', ['\u{1e14}', '\u{0}', '\u{0}']), ('\u{1e17}', ['\u{1e16}', '\u{0}', '\u{0}']), - ('\u{1e19}', ['\u{1e18}', '\u{0}', '\u{0}']), ('\u{1e1b}', ['\u{1e1a}', '\u{0}', '\u{0}']), - ('\u{1e1d}', ['\u{1e1c}', '\u{0}', '\u{0}']), ('\u{1e1f}', ['\u{1e1e}', '\u{0}', '\u{0}']), - ('\u{1e21}', ['\u{1e20}', '\u{0}', '\u{0}']), ('\u{1e23}', ['\u{1e22}', '\u{0}', '\u{0}']), - ('\u{1e25}', ['\u{1e24}', '\u{0}', '\u{0}']), ('\u{1e27}', ['\u{1e26}', '\u{0}', '\u{0}']), - ('\u{1e29}', ['\u{1e28}', '\u{0}', '\u{0}']), ('\u{1e2b}', ['\u{1e2a}', '\u{0}', '\u{0}']), - ('\u{1e2d}', ['\u{1e2c}', '\u{0}', '\u{0}']), ('\u{1e2f}', ['\u{1e2e}', '\u{0}', '\u{0}']), - ('\u{1e31}', ['\u{1e30}', '\u{0}', '\u{0}']), ('\u{1e33}', ['\u{1e32}', '\u{0}', '\u{0}']), - ('\u{1e35}', ['\u{1e34}', '\u{0}', '\u{0}']), ('\u{1e37}', ['\u{1e36}', '\u{0}', '\u{0}']), - ('\u{1e39}', ['\u{1e38}', '\u{0}', '\u{0}']), ('\u{1e3b}', ['\u{1e3a}', '\u{0}', '\u{0}']), - ('\u{1e3d}', ['\u{1e3c}', '\u{0}', '\u{0}']), ('\u{1e3f}', ['\u{1e3e}', '\u{0}', '\u{0}']), - ('\u{1e41}', ['\u{1e40}', '\u{0}', '\u{0}']), ('\u{1e43}', ['\u{1e42}', '\u{0}', '\u{0}']), - ('\u{1e45}', ['\u{1e44}', '\u{0}', '\u{0}']), ('\u{1e47}', ['\u{1e46}', '\u{0}', '\u{0}']), - ('\u{1e49}', ['\u{1e48}', '\u{0}', '\u{0}']), ('\u{1e4b}', ['\u{1e4a}', '\u{0}', '\u{0}']), - ('\u{1e4d}', ['\u{1e4c}', '\u{0}', '\u{0}']), ('\u{1e4f}', ['\u{1e4e}', '\u{0}', '\u{0}']), - ('\u{1e51}', ['\u{1e50}', '\u{0}', '\u{0}']), ('\u{1e53}', ['\u{1e52}', '\u{0}', '\u{0}']), - ('\u{1e55}', ['\u{1e54}', '\u{0}', '\u{0}']), ('\u{1e57}', ['\u{1e56}', '\u{0}', '\u{0}']), - ('\u{1e59}', ['\u{1e58}', '\u{0}', '\u{0}']), ('\u{1e5b}', ['\u{1e5a}', '\u{0}', '\u{0}']), - ('\u{1e5d}', ['\u{1e5c}', '\u{0}', '\u{0}']), ('\u{1e5f}', ['\u{1e5e}', '\u{0}', '\u{0}']), - ('\u{1e61}', ['\u{1e60}', '\u{0}', '\u{0}']), ('\u{1e63}', ['\u{1e62}', '\u{0}', '\u{0}']), - ('\u{1e65}', ['\u{1e64}', '\u{0}', '\u{0}']), ('\u{1e67}', ['\u{1e66}', '\u{0}', '\u{0}']), - ('\u{1e69}', ['\u{1e68}', '\u{0}', '\u{0}']), ('\u{1e6b}', ['\u{1e6a}', '\u{0}', '\u{0}']), - ('\u{1e6d}', ['\u{1e6c}', '\u{0}', '\u{0}']), ('\u{1e6f}', ['\u{1e6e}', '\u{0}', '\u{0}']), - ('\u{1e71}', ['\u{1e70}', '\u{0}', '\u{0}']), ('\u{1e73}', ['\u{1e72}', '\u{0}', '\u{0}']), - ('\u{1e75}', ['\u{1e74}', '\u{0}', '\u{0}']), ('\u{1e77}', ['\u{1e76}', '\u{0}', '\u{0}']), - ('\u{1e79}', ['\u{1e78}', '\u{0}', '\u{0}']), ('\u{1e7b}', ['\u{1e7a}', '\u{0}', '\u{0}']), - ('\u{1e7d}', ['\u{1e7c}', '\u{0}', '\u{0}']), ('\u{1e7f}', ['\u{1e7e}', '\u{0}', '\u{0}']), - ('\u{1e81}', ['\u{1e80}', '\u{0}', '\u{0}']), ('\u{1e83}', ['\u{1e82}', '\u{0}', '\u{0}']), - ('\u{1e85}', ['\u{1e84}', '\u{0}', '\u{0}']), ('\u{1e87}', ['\u{1e86}', '\u{0}', '\u{0}']), - ('\u{1e89}', ['\u{1e88}', '\u{0}', '\u{0}']), ('\u{1e8b}', ['\u{1e8a}', '\u{0}', '\u{0}']), - ('\u{1e8d}', ['\u{1e8c}', '\u{0}', '\u{0}']), ('\u{1e8f}', ['\u{1e8e}', '\u{0}', '\u{0}']), - ('\u{1e91}', ['\u{1e90}', '\u{0}', '\u{0}']), ('\u{1e93}', ['\u{1e92}', '\u{0}', '\u{0}']), - ('\u{1e95}', ['\u{1e94}', '\u{0}', '\u{0}']), ('\u{1e96}', ['\u{48}', '\u{331}', '\u{0}']), - ('\u{1e97}', ['\u{54}', '\u{308}', '\u{0}']), ('\u{1e98}', ['\u{57}', '\u{30a}', '\u{0}']), - ('\u{1e99}', ['\u{59}', '\u{30a}', '\u{0}']), ('\u{1e9a}', ['\u{41}', '\u{2be}', '\u{0}']), - ('\u{1e9b}', ['\u{1e60}', '\u{0}', '\u{0}']), ('\u{1ea1}', ['\u{1ea0}', '\u{0}', '\u{0}']), - ('\u{1ea3}', ['\u{1ea2}', '\u{0}', '\u{0}']), ('\u{1ea5}', ['\u{1ea4}', '\u{0}', '\u{0}']), - ('\u{1ea7}', ['\u{1ea6}', '\u{0}', '\u{0}']), ('\u{1ea9}', ['\u{1ea8}', '\u{0}', '\u{0}']), - ('\u{1eab}', ['\u{1eaa}', '\u{0}', '\u{0}']), ('\u{1ead}', ['\u{1eac}', '\u{0}', '\u{0}']), - ('\u{1eaf}', ['\u{1eae}', '\u{0}', '\u{0}']), ('\u{1eb1}', ['\u{1eb0}', '\u{0}', '\u{0}']), - ('\u{1eb3}', ['\u{1eb2}', '\u{0}', '\u{0}']), ('\u{1eb5}', ['\u{1eb4}', '\u{0}', '\u{0}']), - ('\u{1eb7}', ['\u{1eb6}', '\u{0}', '\u{0}']), ('\u{1eb9}', ['\u{1eb8}', '\u{0}', '\u{0}']), - ('\u{1ebb}', ['\u{1eba}', '\u{0}', '\u{0}']), ('\u{1ebd}', ['\u{1ebc}', '\u{0}', '\u{0}']), - ('\u{1ebf}', ['\u{1ebe}', '\u{0}', '\u{0}']), ('\u{1ec1}', ['\u{1ec0}', '\u{0}', '\u{0}']), - ('\u{1ec3}', ['\u{1ec2}', '\u{0}', '\u{0}']), ('\u{1ec5}', ['\u{1ec4}', '\u{0}', '\u{0}']), - ('\u{1ec7}', ['\u{1ec6}', '\u{0}', '\u{0}']), ('\u{1ec9}', ['\u{1ec8}', '\u{0}', '\u{0}']), - ('\u{1ecb}', ['\u{1eca}', '\u{0}', '\u{0}']), ('\u{1ecd}', ['\u{1ecc}', '\u{0}', '\u{0}']), - ('\u{1ecf}', ['\u{1ece}', '\u{0}', '\u{0}']), ('\u{1ed1}', ['\u{1ed0}', '\u{0}', '\u{0}']), - ('\u{1ed3}', ['\u{1ed2}', '\u{0}', '\u{0}']), ('\u{1ed5}', ['\u{1ed4}', '\u{0}', '\u{0}']), - ('\u{1ed7}', ['\u{1ed6}', '\u{0}', '\u{0}']), ('\u{1ed9}', ['\u{1ed8}', '\u{0}', '\u{0}']), - ('\u{1edb}', ['\u{1eda}', '\u{0}', '\u{0}']), ('\u{1edd}', ['\u{1edc}', '\u{0}', '\u{0}']), - ('\u{1edf}', ['\u{1ede}', '\u{0}', '\u{0}']), ('\u{1ee1}', ['\u{1ee0}', '\u{0}', '\u{0}']), - ('\u{1ee3}', ['\u{1ee2}', '\u{0}', '\u{0}']), ('\u{1ee5}', ['\u{1ee4}', '\u{0}', '\u{0}']), - ('\u{1ee7}', ['\u{1ee6}', '\u{0}', '\u{0}']), ('\u{1ee9}', ['\u{1ee8}', '\u{0}', '\u{0}']), - ('\u{1eeb}', ['\u{1eea}', '\u{0}', '\u{0}']), ('\u{1eed}', ['\u{1eec}', '\u{0}', '\u{0}']), - ('\u{1eef}', ['\u{1eee}', '\u{0}', '\u{0}']), ('\u{1ef1}', ['\u{1ef0}', '\u{0}', '\u{0}']), - ('\u{1ef3}', ['\u{1ef2}', '\u{0}', '\u{0}']), ('\u{1ef5}', ['\u{1ef4}', '\u{0}', '\u{0}']), - ('\u{1ef7}', ['\u{1ef6}', '\u{0}', '\u{0}']), ('\u{1ef9}', ['\u{1ef8}', '\u{0}', '\u{0}']), - ('\u{1efb}', ['\u{1efa}', '\u{0}', '\u{0}']), ('\u{1efd}', ['\u{1efc}', '\u{0}', '\u{0}']), - ('\u{1eff}', ['\u{1efe}', '\u{0}', '\u{0}']), ('\u{1f00}', ['\u{1f08}', '\u{0}', '\u{0}']), - ('\u{1f01}', ['\u{1f09}', '\u{0}', '\u{0}']), ('\u{1f02}', ['\u{1f0a}', '\u{0}', '\u{0}']), - ('\u{1f03}', ['\u{1f0b}', '\u{0}', '\u{0}']), ('\u{1f04}', ['\u{1f0c}', '\u{0}', '\u{0}']), - ('\u{1f05}', ['\u{1f0d}', '\u{0}', '\u{0}']), ('\u{1f06}', ['\u{1f0e}', '\u{0}', '\u{0}']), - ('\u{1f07}', ['\u{1f0f}', '\u{0}', '\u{0}']), ('\u{1f10}', ['\u{1f18}', '\u{0}', '\u{0}']), - ('\u{1f11}', ['\u{1f19}', '\u{0}', '\u{0}']), ('\u{1f12}', ['\u{1f1a}', '\u{0}', '\u{0}']), - ('\u{1f13}', ['\u{1f1b}', '\u{0}', '\u{0}']), ('\u{1f14}', ['\u{1f1c}', '\u{0}', '\u{0}']), - ('\u{1f15}', ['\u{1f1d}', '\u{0}', '\u{0}']), ('\u{1f20}', ['\u{1f28}', '\u{0}', '\u{0}']), - ('\u{1f21}', ['\u{1f29}', '\u{0}', '\u{0}']), ('\u{1f22}', ['\u{1f2a}', '\u{0}', '\u{0}']), - ('\u{1f23}', ['\u{1f2b}', '\u{0}', '\u{0}']), ('\u{1f24}', ['\u{1f2c}', '\u{0}', '\u{0}']), - ('\u{1f25}', ['\u{1f2d}', '\u{0}', '\u{0}']), ('\u{1f26}', ['\u{1f2e}', '\u{0}', '\u{0}']), - ('\u{1f27}', ['\u{1f2f}', '\u{0}', '\u{0}']), ('\u{1f30}', ['\u{1f38}', '\u{0}', '\u{0}']), - ('\u{1f31}', ['\u{1f39}', '\u{0}', '\u{0}']), ('\u{1f32}', ['\u{1f3a}', '\u{0}', '\u{0}']), - ('\u{1f33}', ['\u{1f3b}', '\u{0}', '\u{0}']), ('\u{1f34}', ['\u{1f3c}', '\u{0}', '\u{0}']), - ('\u{1f35}', ['\u{1f3d}', '\u{0}', '\u{0}']), ('\u{1f36}', ['\u{1f3e}', '\u{0}', '\u{0}']), - ('\u{1f37}', ['\u{1f3f}', '\u{0}', '\u{0}']), ('\u{1f40}', ['\u{1f48}', '\u{0}', '\u{0}']), - ('\u{1f41}', ['\u{1f49}', '\u{0}', '\u{0}']), ('\u{1f42}', ['\u{1f4a}', '\u{0}', '\u{0}']), - ('\u{1f43}', ['\u{1f4b}', '\u{0}', '\u{0}']), ('\u{1f44}', ['\u{1f4c}', '\u{0}', '\u{0}']), - ('\u{1f45}', ['\u{1f4d}', '\u{0}', '\u{0}']), ('\u{1f50}', ['\u{3a5}', '\u{313}', '\u{0}']), - ('\u{1f51}', ['\u{1f59}', '\u{0}', '\u{0}']), - ('\u{1f52}', ['\u{3a5}', '\u{313}', '\u{300}']), - ('\u{1f53}', ['\u{1f5b}', '\u{0}', '\u{0}']), - ('\u{1f54}', ['\u{3a5}', '\u{313}', '\u{301}']), - ('\u{1f55}', ['\u{1f5d}', '\u{0}', '\u{0}']), - ('\u{1f56}', ['\u{3a5}', '\u{313}', '\u{342}']), - ('\u{1f57}', ['\u{1f5f}', '\u{0}', '\u{0}']), ('\u{1f60}', ['\u{1f68}', '\u{0}', '\u{0}']), - ('\u{1f61}', ['\u{1f69}', '\u{0}', '\u{0}']), ('\u{1f62}', ['\u{1f6a}', '\u{0}', '\u{0}']), - ('\u{1f63}', ['\u{1f6b}', '\u{0}', '\u{0}']), ('\u{1f64}', ['\u{1f6c}', '\u{0}', '\u{0}']), - ('\u{1f65}', ['\u{1f6d}', '\u{0}', '\u{0}']), ('\u{1f66}', ['\u{1f6e}', '\u{0}', '\u{0}']), - ('\u{1f67}', ['\u{1f6f}', '\u{0}', '\u{0}']), ('\u{1f70}', ['\u{1fba}', '\u{0}', '\u{0}']), - ('\u{1f71}', ['\u{1fbb}', '\u{0}', '\u{0}']), ('\u{1f72}', ['\u{1fc8}', '\u{0}', '\u{0}']), - ('\u{1f73}', ['\u{1fc9}', '\u{0}', '\u{0}']), ('\u{1f74}', ['\u{1fca}', '\u{0}', '\u{0}']), - ('\u{1f75}', ['\u{1fcb}', '\u{0}', '\u{0}']), ('\u{1f76}', ['\u{1fda}', '\u{0}', '\u{0}']), - ('\u{1f77}', ['\u{1fdb}', '\u{0}', '\u{0}']), ('\u{1f78}', ['\u{1ff8}', '\u{0}', '\u{0}']), - ('\u{1f79}', ['\u{1ff9}', '\u{0}', '\u{0}']), ('\u{1f7a}', ['\u{1fea}', '\u{0}', '\u{0}']), - ('\u{1f7b}', ['\u{1feb}', '\u{0}', '\u{0}']), ('\u{1f7c}', ['\u{1ffa}', '\u{0}', '\u{0}']), - ('\u{1f7d}', ['\u{1ffb}', '\u{0}', '\u{0}']), - ('\u{1f80}', ['\u{1f08}', '\u{399}', '\u{0}']), - ('\u{1f81}', ['\u{1f09}', '\u{399}', '\u{0}']), - ('\u{1f82}', ['\u{1f0a}', '\u{399}', '\u{0}']), - ('\u{1f83}', ['\u{1f0b}', '\u{399}', '\u{0}']), - ('\u{1f84}', ['\u{1f0c}', '\u{399}', '\u{0}']), - ('\u{1f85}', ['\u{1f0d}', '\u{399}', '\u{0}']), - ('\u{1f86}', ['\u{1f0e}', '\u{399}', '\u{0}']), - ('\u{1f87}', ['\u{1f0f}', '\u{399}', '\u{0}']), - ('\u{1f88}', ['\u{1f08}', '\u{399}', '\u{0}']), - ('\u{1f89}', ['\u{1f09}', '\u{399}', '\u{0}']), - ('\u{1f8a}', ['\u{1f0a}', '\u{399}', '\u{0}']), - ('\u{1f8b}', ['\u{1f0b}', '\u{399}', '\u{0}']), - ('\u{1f8c}', ['\u{1f0c}', '\u{399}', '\u{0}']), - ('\u{1f8d}', ['\u{1f0d}', '\u{399}', '\u{0}']), - ('\u{1f8e}', ['\u{1f0e}', '\u{399}', '\u{0}']), - ('\u{1f8f}', ['\u{1f0f}', '\u{399}', '\u{0}']), - ('\u{1f90}', ['\u{1f28}', '\u{399}', '\u{0}']), - ('\u{1f91}', ['\u{1f29}', '\u{399}', '\u{0}']), - ('\u{1f92}', ['\u{1f2a}', '\u{399}', '\u{0}']), - ('\u{1f93}', ['\u{1f2b}', '\u{399}', '\u{0}']), - ('\u{1f94}', ['\u{1f2c}', '\u{399}', '\u{0}']), - ('\u{1f95}', ['\u{1f2d}', '\u{399}', '\u{0}']), - ('\u{1f96}', ['\u{1f2e}', '\u{399}', '\u{0}']), - ('\u{1f97}', ['\u{1f2f}', '\u{399}', '\u{0}']), - ('\u{1f98}', ['\u{1f28}', '\u{399}', '\u{0}']), - ('\u{1f99}', ['\u{1f29}', '\u{399}', '\u{0}']), - ('\u{1f9a}', ['\u{1f2a}', '\u{399}', '\u{0}']), - ('\u{1f9b}', ['\u{1f2b}', '\u{399}', '\u{0}']), - ('\u{1f9c}', ['\u{1f2c}', '\u{399}', '\u{0}']), - ('\u{1f9d}', ['\u{1f2d}', '\u{399}', '\u{0}']), - ('\u{1f9e}', ['\u{1f2e}', '\u{399}', '\u{0}']), - ('\u{1f9f}', ['\u{1f2f}', '\u{399}', '\u{0}']), - ('\u{1fa0}', ['\u{1f68}', '\u{399}', '\u{0}']), - ('\u{1fa1}', ['\u{1f69}', '\u{399}', '\u{0}']), - ('\u{1fa2}', ['\u{1f6a}', '\u{399}', '\u{0}']), - ('\u{1fa3}', ['\u{1f6b}', '\u{399}', '\u{0}']), - ('\u{1fa4}', ['\u{1f6c}', '\u{399}', '\u{0}']), - ('\u{1fa5}', ['\u{1f6d}', '\u{399}', '\u{0}']), - ('\u{1fa6}', ['\u{1f6e}', '\u{399}', '\u{0}']), - ('\u{1fa7}', ['\u{1f6f}', '\u{399}', '\u{0}']), - ('\u{1fa8}', ['\u{1f68}', '\u{399}', '\u{0}']), - ('\u{1fa9}', ['\u{1f69}', '\u{399}', '\u{0}']), - ('\u{1faa}', ['\u{1f6a}', '\u{399}', '\u{0}']), - ('\u{1fab}', ['\u{1f6b}', '\u{399}', '\u{0}']), - ('\u{1fac}', ['\u{1f6c}', '\u{399}', '\u{0}']), - ('\u{1fad}', ['\u{1f6d}', '\u{399}', '\u{0}']), - ('\u{1fae}', ['\u{1f6e}', '\u{399}', '\u{0}']), - ('\u{1faf}', ['\u{1f6f}', '\u{399}', '\u{0}']), - ('\u{1fb0}', ['\u{1fb8}', '\u{0}', '\u{0}']), ('\u{1fb1}', ['\u{1fb9}', '\u{0}', '\u{0}']), - ('\u{1fb2}', ['\u{1fba}', '\u{399}', '\u{0}']), - ('\u{1fb3}', ['\u{391}', '\u{399}', '\u{0}']), - ('\u{1fb4}', ['\u{386}', '\u{399}', '\u{0}']), - ('\u{1fb6}', ['\u{391}', '\u{342}', '\u{0}']), - ('\u{1fb7}', ['\u{391}', '\u{342}', '\u{399}']), - ('\u{1fbc}', ['\u{391}', '\u{399}', '\u{0}']), ('\u{1fbe}', ['\u{399}', '\u{0}', '\u{0}']), - ('\u{1fc2}', ['\u{1fca}', '\u{399}', '\u{0}']), - ('\u{1fc3}', ['\u{397}', '\u{399}', '\u{0}']), - ('\u{1fc4}', ['\u{389}', '\u{399}', '\u{0}']), - ('\u{1fc6}', ['\u{397}', '\u{342}', '\u{0}']), - ('\u{1fc7}', ['\u{397}', '\u{342}', '\u{399}']), - ('\u{1fcc}', ['\u{397}', '\u{399}', '\u{0}']), ('\u{1fd0}', ['\u{1fd8}', '\u{0}', '\u{0}']), - ('\u{1fd1}', ['\u{1fd9}', '\u{0}', '\u{0}']), - ('\u{1fd2}', ['\u{399}', '\u{308}', '\u{300}']), - ('\u{1fd3}', ['\u{399}', '\u{308}', '\u{301}']), - ('\u{1fd6}', ['\u{399}', '\u{342}', '\u{0}']), - ('\u{1fd7}', ['\u{399}', '\u{308}', '\u{342}']), - ('\u{1fe0}', ['\u{1fe8}', '\u{0}', '\u{0}']), ('\u{1fe1}', ['\u{1fe9}', '\u{0}', '\u{0}']), - ('\u{1fe2}', ['\u{3a5}', '\u{308}', '\u{300}']), - ('\u{1fe3}', ['\u{3a5}', '\u{308}', '\u{301}']), - ('\u{1fe4}', ['\u{3a1}', '\u{313}', '\u{0}']), ('\u{1fe5}', ['\u{1fec}', '\u{0}', '\u{0}']), - ('\u{1fe6}', ['\u{3a5}', '\u{342}', '\u{0}']), - ('\u{1fe7}', ['\u{3a5}', '\u{308}', '\u{342}']), - ('\u{1ff2}', ['\u{1ffa}', '\u{399}', '\u{0}']), - ('\u{1ff3}', ['\u{3a9}', '\u{399}', '\u{0}']), - ('\u{1ff4}', ['\u{38f}', '\u{399}', '\u{0}']), - ('\u{1ff6}', ['\u{3a9}', '\u{342}', '\u{0}']), - ('\u{1ff7}', ['\u{3a9}', '\u{342}', '\u{399}']), - ('\u{1ffc}', ['\u{3a9}', '\u{399}', '\u{0}']), ('\u{214e}', ['\u{2132}', '\u{0}', '\u{0}']), - ('\u{2170}', ['\u{2160}', '\u{0}', '\u{0}']), ('\u{2171}', ['\u{2161}', '\u{0}', '\u{0}']), - ('\u{2172}', ['\u{2162}', '\u{0}', '\u{0}']), ('\u{2173}', ['\u{2163}', '\u{0}', '\u{0}']), - ('\u{2174}', ['\u{2164}', '\u{0}', '\u{0}']), ('\u{2175}', ['\u{2165}', '\u{0}', '\u{0}']), - ('\u{2176}', ['\u{2166}', '\u{0}', '\u{0}']), ('\u{2177}', ['\u{2167}', '\u{0}', '\u{0}']), - ('\u{2178}', ['\u{2168}', '\u{0}', '\u{0}']), ('\u{2179}', ['\u{2169}', '\u{0}', '\u{0}']), - ('\u{217a}', ['\u{216a}', '\u{0}', '\u{0}']), ('\u{217b}', ['\u{216b}', '\u{0}', '\u{0}']), - ('\u{217c}', ['\u{216c}', '\u{0}', '\u{0}']), ('\u{217d}', ['\u{216d}', '\u{0}', '\u{0}']), - ('\u{217e}', ['\u{216e}', '\u{0}', '\u{0}']), ('\u{217f}', ['\u{216f}', '\u{0}', '\u{0}']), - ('\u{2184}', ['\u{2183}', '\u{0}', '\u{0}']), ('\u{24d0}', ['\u{24b6}', '\u{0}', '\u{0}']), - ('\u{24d1}', ['\u{24b7}', '\u{0}', '\u{0}']), ('\u{24d2}', ['\u{24b8}', '\u{0}', '\u{0}']), - ('\u{24d3}', ['\u{24b9}', '\u{0}', '\u{0}']), ('\u{24d4}', ['\u{24ba}', '\u{0}', '\u{0}']), - ('\u{24d5}', ['\u{24bb}', '\u{0}', '\u{0}']), ('\u{24d6}', ['\u{24bc}', '\u{0}', '\u{0}']), - ('\u{24d7}', ['\u{24bd}', '\u{0}', '\u{0}']), ('\u{24d8}', ['\u{24be}', '\u{0}', '\u{0}']), - ('\u{24d9}', ['\u{24bf}', '\u{0}', '\u{0}']), ('\u{24da}', ['\u{24c0}', '\u{0}', '\u{0}']), - ('\u{24db}', ['\u{24c1}', '\u{0}', '\u{0}']), ('\u{24dc}', ['\u{24c2}', '\u{0}', '\u{0}']), - ('\u{24dd}', ['\u{24c3}', '\u{0}', '\u{0}']), ('\u{24de}', ['\u{24c4}', '\u{0}', '\u{0}']), - ('\u{24df}', ['\u{24c5}', '\u{0}', '\u{0}']), ('\u{24e0}', ['\u{24c6}', '\u{0}', '\u{0}']), - ('\u{24e1}', ['\u{24c7}', '\u{0}', '\u{0}']), ('\u{24e2}', ['\u{24c8}', '\u{0}', '\u{0}']), - ('\u{24e3}', ['\u{24c9}', '\u{0}', '\u{0}']), ('\u{24e4}', ['\u{24ca}', '\u{0}', '\u{0}']), - ('\u{24e5}', ['\u{24cb}', '\u{0}', '\u{0}']), ('\u{24e6}', ['\u{24cc}', '\u{0}', '\u{0}']), - ('\u{24e7}', ['\u{24cd}', '\u{0}', '\u{0}']), ('\u{24e8}', ['\u{24ce}', '\u{0}', '\u{0}']), - ('\u{24e9}', ['\u{24cf}', '\u{0}', '\u{0}']), ('\u{2c30}', ['\u{2c00}', '\u{0}', '\u{0}']), - ('\u{2c31}', ['\u{2c01}', '\u{0}', '\u{0}']), ('\u{2c32}', ['\u{2c02}', '\u{0}', '\u{0}']), - ('\u{2c33}', ['\u{2c03}', '\u{0}', '\u{0}']), ('\u{2c34}', ['\u{2c04}', '\u{0}', '\u{0}']), - ('\u{2c35}', ['\u{2c05}', '\u{0}', '\u{0}']), ('\u{2c36}', ['\u{2c06}', '\u{0}', '\u{0}']), - ('\u{2c37}', ['\u{2c07}', '\u{0}', '\u{0}']), ('\u{2c38}', ['\u{2c08}', '\u{0}', '\u{0}']), - ('\u{2c39}', ['\u{2c09}', '\u{0}', '\u{0}']), ('\u{2c3a}', ['\u{2c0a}', '\u{0}', '\u{0}']), - ('\u{2c3b}', ['\u{2c0b}', '\u{0}', '\u{0}']), ('\u{2c3c}', ['\u{2c0c}', '\u{0}', '\u{0}']), - ('\u{2c3d}', ['\u{2c0d}', '\u{0}', '\u{0}']), ('\u{2c3e}', ['\u{2c0e}', '\u{0}', '\u{0}']), - ('\u{2c3f}', ['\u{2c0f}', '\u{0}', '\u{0}']), ('\u{2c40}', ['\u{2c10}', '\u{0}', '\u{0}']), - ('\u{2c41}', ['\u{2c11}', '\u{0}', '\u{0}']), ('\u{2c42}', ['\u{2c12}', '\u{0}', '\u{0}']), - ('\u{2c43}', ['\u{2c13}', '\u{0}', '\u{0}']), ('\u{2c44}', ['\u{2c14}', '\u{0}', '\u{0}']), - ('\u{2c45}', ['\u{2c15}', '\u{0}', '\u{0}']), ('\u{2c46}', ['\u{2c16}', '\u{0}', '\u{0}']), - ('\u{2c47}', ['\u{2c17}', '\u{0}', '\u{0}']), ('\u{2c48}', ['\u{2c18}', '\u{0}', '\u{0}']), - ('\u{2c49}', ['\u{2c19}', '\u{0}', '\u{0}']), ('\u{2c4a}', ['\u{2c1a}', '\u{0}', '\u{0}']), - ('\u{2c4b}', ['\u{2c1b}', '\u{0}', '\u{0}']), ('\u{2c4c}', ['\u{2c1c}', '\u{0}', '\u{0}']), - ('\u{2c4d}', ['\u{2c1d}', '\u{0}', '\u{0}']), ('\u{2c4e}', ['\u{2c1e}', '\u{0}', '\u{0}']), - ('\u{2c4f}', ['\u{2c1f}', '\u{0}', '\u{0}']), ('\u{2c50}', ['\u{2c20}', '\u{0}', '\u{0}']), - ('\u{2c51}', ['\u{2c21}', '\u{0}', '\u{0}']), ('\u{2c52}', ['\u{2c22}', '\u{0}', '\u{0}']), - ('\u{2c53}', ['\u{2c23}', '\u{0}', '\u{0}']), ('\u{2c54}', ['\u{2c24}', '\u{0}', '\u{0}']), - ('\u{2c55}', ['\u{2c25}', '\u{0}', '\u{0}']), ('\u{2c56}', ['\u{2c26}', '\u{0}', '\u{0}']), - ('\u{2c57}', ['\u{2c27}', '\u{0}', '\u{0}']), ('\u{2c58}', ['\u{2c28}', '\u{0}', '\u{0}']), - ('\u{2c59}', ['\u{2c29}', '\u{0}', '\u{0}']), ('\u{2c5a}', ['\u{2c2a}', '\u{0}', '\u{0}']), - ('\u{2c5b}', ['\u{2c2b}', '\u{0}', '\u{0}']), ('\u{2c5c}', ['\u{2c2c}', '\u{0}', '\u{0}']), - ('\u{2c5d}', ['\u{2c2d}', '\u{0}', '\u{0}']), ('\u{2c5e}', ['\u{2c2e}', '\u{0}', '\u{0}']), - ('\u{2c5f}', ['\u{2c2f}', '\u{0}', '\u{0}']), ('\u{2c61}', ['\u{2c60}', '\u{0}', '\u{0}']), - ('\u{2c65}', ['\u{23a}', '\u{0}', '\u{0}']), ('\u{2c66}', ['\u{23e}', '\u{0}', '\u{0}']), - ('\u{2c68}', ['\u{2c67}', '\u{0}', '\u{0}']), ('\u{2c6a}', ['\u{2c69}', '\u{0}', '\u{0}']), - ('\u{2c6c}', ['\u{2c6b}', '\u{0}', '\u{0}']), ('\u{2c73}', ['\u{2c72}', '\u{0}', '\u{0}']), - ('\u{2c76}', ['\u{2c75}', '\u{0}', '\u{0}']), ('\u{2c81}', ['\u{2c80}', '\u{0}', '\u{0}']), - ('\u{2c83}', ['\u{2c82}', '\u{0}', '\u{0}']), ('\u{2c85}', ['\u{2c84}', '\u{0}', '\u{0}']), - ('\u{2c87}', ['\u{2c86}', '\u{0}', '\u{0}']), ('\u{2c89}', ['\u{2c88}', '\u{0}', '\u{0}']), - ('\u{2c8b}', ['\u{2c8a}', '\u{0}', '\u{0}']), ('\u{2c8d}', ['\u{2c8c}', '\u{0}', '\u{0}']), - ('\u{2c8f}', ['\u{2c8e}', '\u{0}', '\u{0}']), ('\u{2c91}', ['\u{2c90}', '\u{0}', '\u{0}']), - ('\u{2c93}', ['\u{2c92}', '\u{0}', '\u{0}']), ('\u{2c95}', ['\u{2c94}', '\u{0}', '\u{0}']), - ('\u{2c97}', ['\u{2c96}', '\u{0}', '\u{0}']), ('\u{2c99}', ['\u{2c98}', '\u{0}', '\u{0}']), - ('\u{2c9b}', ['\u{2c9a}', '\u{0}', '\u{0}']), ('\u{2c9d}', ['\u{2c9c}', '\u{0}', '\u{0}']), - ('\u{2c9f}', ['\u{2c9e}', '\u{0}', '\u{0}']), ('\u{2ca1}', ['\u{2ca0}', '\u{0}', '\u{0}']), - ('\u{2ca3}', ['\u{2ca2}', '\u{0}', '\u{0}']), ('\u{2ca5}', ['\u{2ca4}', '\u{0}', '\u{0}']), - ('\u{2ca7}', ['\u{2ca6}', '\u{0}', '\u{0}']), ('\u{2ca9}', ['\u{2ca8}', '\u{0}', '\u{0}']), - ('\u{2cab}', ['\u{2caa}', '\u{0}', '\u{0}']), ('\u{2cad}', ['\u{2cac}', '\u{0}', '\u{0}']), - ('\u{2caf}', ['\u{2cae}', '\u{0}', '\u{0}']), ('\u{2cb1}', ['\u{2cb0}', '\u{0}', '\u{0}']), - ('\u{2cb3}', ['\u{2cb2}', '\u{0}', '\u{0}']), ('\u{2cb5}', ['\u{2cb4}', '\u{0}', '\u{0}']), - ('\u{2cb7}', ['\u{2cb6}', '\u{0}', '\u{0}']), ('\u{2cb9}', ['\u{2cb8}', '\u{0}', '\u{0}']), - ('\u{2cbb}', ['\u{2cba}', '\u{0}', '\u{0}']), ('\u{2cbd}', ['\u{2cbc}', '\u{0}', '\u{0}']), - ('\u{2cbf}', ['\u{2cbe}', '\u{0}', '\u{0}']), ('\u{2cc1}', ['\u{2cc0}', '\u{0}', '\u{0}']), - ('\u{2cc3}', ['\u{2cc2}', '\u{0}', '\u{0}']), ('\u{2cc5}', ['\u{2cc4}', '\u{0}', '\u{0}']), - ('\u{2cc7}', ['\u{2cc6}', '\u{0}', '\u{0}']), ('\u{2cc9}', ['\u{2cc8}', '\u{0}', '\u{0}']), - ('\u{2ccb}', ['\u{2cca}', '\u{0}', '\u{0}']), ('\u{2ccd}', ['\u{2ccc}', '\u{0}', '\u{0}']), - ('\u{2ccf}', ['\u{2cce}', '\u{0}', '\u{0}']), ('\u{2cd1}', ['\u{2cd0}', '\u{0}', '\u{0}']), - ('\u{2cd3}', ['\u{2cd2}', '\u{0}', '\u{0}']), ('\u{2cd5}', ['\u{2cd4}', '\u{0}', '\u{0}']), - ('\u{2cd7}', ['\u{2cd6}', '\u{0}', '\u{0}']), ('\u{2cd9}', ['\u{2cd8}', '\u{0}', '\u{0}']), - ('\u{2cdb}', ['\u{2cda}', '\u{0}', '\u{0}']), ('\u{2cdd}', ['\u{2cdc}', '\u{0}', '\u{0}']), - ('\u{2cdf}', ['\u{2cde}', '\u{0}', '\u{0}']), ('\u{2ce1}', ['\u{2ce0}', '\u{0}', '\u{0}']), - ('\u{2ce3}', ['\u{2ce2}', '\u{0}', '\u{0}']), ('\u{2cec}', ['\u{2ceb}', '\u{0}', '\u{0}']), - ('\u{2cee}', ['\u{2ced}', '\u{0}', '\u{0}']), ('\u{2cf3}', ['\u{2cf2}', '\u{0}', '\u{0}']), - ('\u{2d00}', ['\u{10a0}', '\u{0}', '\u{0}']), ('\u{2d01}', ['\u{10a1}', '\u{0}', '\u{0}']), - ('\u{2d02}', ['\u{10a2}', '\u{0}', '\u{0}']), ('\u{2d03}', ['\u{10a3}', '\u{0}', '\u{0}']), - ('\u{2d04}', ['\u{10a4}', '\u{0}', '\u{0}']), ('\u{2d05}', ['\u{10a5}', '\u{0}', '\u{0}']), - ('\u{2d06}', ['\u{10a6}', '\u{0}', '\u{0}']), ('\u{2d07}', ['\u{10a7}', '\u{0}', '\u{0}']), - ('\u{2d08}', ['\u{10a8}', '\u{0}', '\u{0}']), ('\u{2d09}', ['\u{10a9}', '\u{0}', '\u{0}']), - ('\u{2d0a}', ['\u{10aa}', '\u{0}', '\u{0}']), ('\u{2d0b}', ['\u{10ab}', '\u{0}', '\u{0}']), - ('\u{2d0c}', ['\u{10ac}', '\u{0}', '\u{0}']), ('\u{2d0d}', ['\u{10ad}', '\u{0}', '\u{0}']), - ('\u{2d0e}', ['\u{10ae}', '\u{0}', '\u{0}']), ('\u{2d0f}', ['\u{10af}', '\u{0}', '\u{0}']), - ('\u{2d10}', ['\u{10b0}', '\u{0}', '\u{0}']), ('\u{2d11}', ['\u{10b1}', '\u{0}', '\u{0}']), - ('\u{2d12}', ['\u{10b2}', '\u{0}', '\u{0}']), ('\u{2d13}', ['\u{10b3}', '\u{0}', '\u{0}']), - ('\u{2d14}', ['\u{10b4}', '\u{0}', '\u{0}']), ('\u{2d15}', ['\u{10b5}', '\u{0}', '\u{0}']), - ('\u{2d16}', ['\u{10b6}', '\u{0}', '\u{0}']), ('\u{2d17}', ['\u{10b7}', '\u{0}', '\u{0}']), - ('\u{2d18}', ['\u{10b8}', '\u{0}', '\u{0}']), ('\u{2d19}', ['\u{10b9}', '\u{0}', '\u{0}']), - ('\u{2d1a}', ['\u{10ba}', '\u{0}', '\u{0}']), ('\u{2d1b}', ['\u{10bb}', '\u{0}', '\u{0}']), - ('\u{2d1c}', ['\u{10bc}', '\u{0}', '\u{0}']), ('\u{2d1d}', ['\u{10bd}', '\u{0}', '\u{0}']), - ('\u{2d1e}', ['\u{10be}', '\u{0}', '\u{0}']), ('\u{2d1f}', ['\u{10bf}', '\u{0}', '\u{0}']), - ('\u{2d20}', ['\u{10c0}', '\u{0}', '\u{0}']), ('\u{2d21}', ['\u{10c1}', '\u{0}', '\u{0}']), - ('\u{2d22}', ['\u{10c2}', '\u{0}', '\u{0}']), ('\u{2d23}', ['\u{10c3}', '\u{0}', '\u{0}']), - ('\u{2d24}', ['\u{10c4}', '\u{0}', '\u{0}']), ('\u{2d25}', ['\u{10c5}', '\u{0}', '\u{0}']), - ('\u{2d27}', ['\u{10c7}', '\u{0}', '\u{0}']), ('\u{2d2d}', ['\u{10cd}', '\u{0}', '\u{0}']), - ('\u{a641}', ['\u{a640}', '\u{0}', '\u{0}']), ('\u{a643}', ['\u{a642}', '\u{0}', '\u{0}']), - ('\u{a645}', ['\u{a644}', '\u{0}', '\u{0}']), ('\u{a647}', ['\u{a646}', '\u{0}', '\u{0}']), - ('\u{a649}', ['\u{a648}', '\u{0}', '\u{0}']), ('\u{a64b}', ['\u{a64a}', '\u{0}', '\u{0}']), - ('\u{a64d}', ['\u{a64c}', '\u{0}', '\u{0}']), ('\u{a64f}', ['\u{a64e}', '\u{0}', '\u{0}']), - ('\u{a651}', ['\u{a650}', '\u{0}', '\u{0}']), ('\u{a653}', ['\u{a652}', '\u{0}', '\u{0}']), - ('\u{a655}', ['\u{a654}', '\u{0}', '\u{0}']), ('\u{a657}', ['\u{a656}', '\u{0}', '\u{0}']), - ('\u{a659}', ['\u{a658}', '\u{0}', '\u{0}']), ('\u{a65b}', ['\u{a65a}', '\u{0}', '\u{0}']), - ('\u{a65d}', ['\u{a65c}', '\u{0}', '\u{0}']), ('\u{a65f}', ['\u{a65e}', '\u{0}', '\u{0}']), - ('\u{a661}', ['\u{a660}', '\u{0}', '\u{0}']), ('\u{a663}', ['\u{a662}', '\u{0}', '\u{0}']), - ('\u{a665}', ['\u{a664}', '\u{0}', '\u{0}']), ('\u{a667}', ['\u{a666}', '\u{0}', '\u{0}']), - ('\u{a669}', ['\u{a668}', '\u{0}', '\u{0}']), ('\u{a66b}', ['\u{a66a}', '\u{0}', '\u{0}']), - ('\u{a66d}', ['\u{a66c}', '\u{0}', '\u{0}']), ('\u{a681}', ['\u{a680}', '\u{0}', '\u{0}']), - ('\u{a683}', ['\u{a682}', '\u{0}', '\u{0}']), ('\u{a685}', ['\u{a684}', '\u{0}', '\u{0}']), - ('\u{a687}', ['\u{a686}', '\u{0}', '\u{0}']), ('\u{a689}', ['\u{a688}', '\u{0}', '\u{0}']), - ('\u{a68b}', ['\u{a68a}', '\u{0}', '\u{0}']), ('\u{a68d}', ['\u{a68c}', '\u{0}', '\u{0}']), - ('\u{a68f}', ['\u{a68e}', '\u{0}', '\u{0}']), ('\u{a691}', ['\u{a690}', '\u{0}', '\u{0}']), - ('\u{a693}', ['\u{a692}', '\u{0}', '\u{0}']), ('\u{a695}', ['\u{a694}', '\u{0}', '\u{0}']), - ('\u{a697}', ['\u{a696}', '\u{0}', '\u{0}']), ('\u{a699}', ['\u{a698}', '\u{0}', '\u{0}']), - ('\u{a69b}', ['\u{a69a}', '\u{0}', '\u{0}']), ('\u{a723}', ['\u{a722}', '\u{0}', '\u{0}']), - ('\u{a725}', ['\u{a724}', '\u{0}', '\u{0}']), ('\u{a727}', ['\u{a726}', '\u{0}', '\u{0}']), - ('\u{a729}', ['\u{a728}', '\u{0}', '\u{0}']), ('\u{a72b}', ['\u{a72a}', '\u{0}', '\u{0}']), - ('\u{a72d}', ['\u{a72c}', '\u{0}', '\u{0}']), ('\u{a72f}', ['\u{a72e}', '\u{0}', '\u{0}']), - ('\u{a733}', ['\u{a732}', '\u{0}', '\u{0}']), ('\u{a735}', ['\u{a734}', '\u{0}', '\u{0}']), - ('\u{a737}', ['\u{a736}', '\u{0}', '\u{0}']), ('\u{a739}', ['\u{a738}', '\u{0}', '\u{0}']), - ('\u{a73b}', ['\u{a73a}', '\u{0}', '\u{0}']), ('\u{a73d}', ['\u{a73c}', '\u{0}', '\u{0}']), - ('\u{a73f}', ['\u{a73e}', '\u{0}', '\u{0}']), ('\u{a741}', ['\u{a740}', '\u{0}', '\u{0}']), - ('\u{a743}', ['\u{a742}', '\u{0}', '\u{0}']), ('\u{a745}', ['\u{a744}', '\u{0}', '\u{0}']), - ('\u{a747}', ['\u{a746}', '\u{0}', '\u{0}']), ('\u{a749}', ['\u{a748}', '\u{0}', '\u{0}']), - ('\u{a74b}', ['\u{a74a}', '\u{0}', '\u{0}']), ('\u{a74d}', ['\u{a74c}', '\u{0}', '\u{0}']), - ('\u{a74f}', ['\u{a74e}', '\u{0}', '\u{0}']), ('\u{a751}', ['\u{a750}', '\u{0}', '\u{0}']), - ('\u{a753}', ['\u{a752}', '\u{0}', '\u{0}']), ('\u{a755}', ['\u{a754}', '\u{0}', '\u{0}']), - ('\u{a757}', ['\u{a756}', '\u{0}', '\u{0}']), ('\u{a759}', ['\u{a758}', '\u{0}', '\u{0}']), - ('\u{a75b}', ['\u{a75a}', '\u{0}', '\u{0}']), ('\u{a75d}', ['\u{a75c}', '\u{0}', '\u{0}']), - ('\u{a75f}', ['\u{a75e}', '\u{0}', '\u{0}']), ('\u{a761}', ['\u{a760}', '\u{0}', '\u{0}']), - ('\u{a763}', ['\u{a762}', '\u{0}', '\u{0}']), ('\u{a765}', ['\u{a764}', '\u{0}', '\u{0}']), - ('\u{a767}', ['\u{a766}', '\u{0}', '\u{0}']), ('\u{a769}', ['\u{a768}', '\u{0}', '\u{0}']), - ('\u{a76b}', ['\u{a76a}', '\u{0}', '\u{0}']), ('\u{a76d}', ['\u{a76c}', '\u{0}', '\u{0}']), - ('\u{a76f}', ['\u{a76e}', '\u{0}', '\u{0}']), ('\u{a77a}', ['\u{a779}', '\u{0}', '\u{0}']), - ('\u{a77c}', ['\u{a77b}', '\u{0}', '\u{0}']), ('\u{a77f}', ['\u{a77e}', '\u{0}', '\u{0}']), - ('\u{a781}', ['\u{a780}', '\u{0}', '\u{0}']), ('\u{a783}', ['\u{a782}', '\u{0}', '\u{0}']), - ('\u{a785}', ['\u{a784}', '\u{0}', '\u{0}']), ('\u{a787}', ['\u{a786}', '\u{0}', '\u{0}']), - ('\u{a78c}', ['\u{a78b}', '\u{0}', '\u{0}']), ('\u{a791}', ['\u{a790}', '\u{0}', '\u{0}']), - ('\u{a793}', ['\u{a792}', '\u{0}', '\u{0}']), ('\u{a794}', ['\u{a7c4}', '\u{0}', '\u{0}']), - ('\u{a797}', ['\u{a796}', '\u{0}', '\u{0}']), ('\u{a799}', ['\u{a798}', '\u{0}', '\u{0}']), - ('\u{a79b}', ['\u{a79a}', '\u{0}', '\u{0}']), ('\u{a79d}', ['\u{a79c}', '\u{0}', '\u{0}']), - ('\u{a79f}', ['\u{a79e}', '\u{0}', '\u{0}']), ('\u{a7a1}', ['\u{a7a0}', '\u{0}', '\u{0}']), - ('\u{a7a3}', ['\u{a7a2}', '\u{0}', '\u{0}']), ('\u{a7a5}', ['\u{a7a4}', '\u{0}', '\u{0}']), - ('\u{a7a7}', ['\u{a7a6}', '\u{0}', '\u{0}']), ('\u{a7a9}', ['\u{a7a8}', '\u{0}', '\u{0}']), - ('\u{a7b5}', ['\u{a7b4}', '\u{0}', '\u{0}']), ('\u{a7b7}', ['\u{a7b6}', '\u{0}', '\u{0}']), - ('\u{a7b9}', ['\u{a7b8}', '\u{0}', '\u{0}']), ('\u{a7bb}', ['\u{a7ba}', '\u{0}', '\u{0}']), - ('\u{a7bd}', ['\u{a7bc}', '\u{0}', '\u{0}']), ('\u{a7bf}', ['\u{a7be}', '\u{0}', '\u{0}']), - ('\u{a7c1}', ['\u{a7c0}', '\u{0}', '\u{0}']), ('\u{a7c3}', ['\u{a7c2}', '\u{0}', '\u{0}']), - ('\u{a7c8}', ['\u{a7c7}', '\u{0}', '\u{0}']), ('\u{a7ca}', ['\u{a7c9}', '\u{0}', '\u{0}']), - ('\u{a7cd}', ['\u{a7cc}', '\u{0}', '\u{0}']), ('\u{a7cf}', ['\u{a7ce}', '\u{0}', '\u{0}']), - ('\u{a7d1}', ['\u{a7d0}', '\u{0}', '\u{0}']), ('\u{a7d3}', ['\u{a7d2}', '\u{0}', '\u{0}']), - ('\u{a7d5}', ['\u{a7d4}', '\u{0}', '\u{0}']), ('\u{a7d7}', ['\u{a7d6}', '\u{0}', '\u{0}']), - ('\u{a7d9}', ['\u{a7d8}', '\u{0}', '\u{0}']), ('\u{a7db}', ['\u{a7da}', '\u{0}', '\u{0}']), - ('\u{a7f6}', ['\u{a7f5}', '\u{0}', '\u{0}']), ('\u{ab53}', ['\u{a7b3}', '\u{0}', '\u{0}']), - ('\u{ab70}', ['\u{13a0}', '\u{0}', '\u{0}']), ('\u{ab71}', ['\u{13a1}', '\u{0}', '\u{0}']), - ('\u{ab72}', ['\u{13a2}', '\u{0}', '\u{0}']), ('\u{ab73}', ['\u{13a3}', '\u{0}', '\u{0}']), - ('\u{ab74}', ['\u{13a4}', '\u{0}', '\u{0}']), ('\u{ab75}', ['\u{13a5}', '\u{0}', '\u{0}']), - ('\u{ab76}', ['\u{13a6}', '\u{0}', '\u{0}']), ('\u{ab77}', ['\u{13a7}', '\u{0}', '\u{0}']), - ('\u{ab78}', ['\u{13a8}', '\u{0}', '\u{0}']), ('\u{ab79}', ['\u{13a9}', '\u{0}', '\u{0}']), - ('\u{ab7a}', ['\u{13aa}', '\u{0}', '\u{0}']), ('\u{ab7b}', ['\u{13ab}', '\u{0}', '\u{0}']), - ('\u{ab7c}', ['\u{13ac}', '\u{0}', '\u{0}']), ('\u{ab7d}', ['\u{13ad}', '\u{0}', '\u{0}']), - ('\u{ab7e}', ['\u{13ae}', '\u{0}', '\u{0}']), ('\u{ab7f}', ['\u{13af}', '\u{0}', '\u{0}']), - ('\u{ab80}', ['\u{13b0}', '\u{0}', '\u{0}']), ('\u{ab81}', ['\u{13b1}', '\u{0}', '\u{0}']), - ('\u{ab82}', ['\u{13b2}', '\u{0}', '\u{0}']), ('\u{ab83}', ['\u{13b3}', '\u{0}', '\u{0}']), - ('\u{ab84}', ['\u{13b4}', '\u{0}', '\u{0}']), ('\u{ab85}', ['\u{13b5}', '\u{0}', '\u{0}']), - ('\u{ab86}', ['\u{13b6}', '\u{0}', '\u{0}']), ('\u{ab87}', ['\u{13b7}', '\u{0}', '\u{0}']), - ('\u{ab88}', ['\u{13b8}', '\u{0}', '\u{0}']), ('\u{ab89}', ['\u{13b9}', '\u{0}', '\u{0}']), - ('\u{ab8a}', ['\u{13ba}', '\u{0}', '\u{0}']), ('\u{ab8b}', ['\u{13bb}', '\u{0}', '\u{0}']), - ('\u{ab8c}', ['\u{13bc}', '\u{0}', '\u{0}']), ('\u{ab8d}', ['\u{13bd}', '\u{0}', '\u{0}']), - ('\u{ab8e}', ['\u{13be}', '\u{0}', '\u{0}']), ('\u{ab8f}', ['\u{13bf}', '\u{0}', '\u{0}']), - ('\u{ab90}', ['\u{13c0}', '\u{0}', '\u{0}']), ('\u{ab91}', ['\u{13c1}', '\u{0}', '\u{0}']), - ('\u{ab92}', ['\u{13c2}', '\u{0}', '\u{0}']), ('\u{ab93}', ['\u{13c3}', '\u{0}', '\u{0}']), - ('\u{ab94}', ['\u{13c4}', '\u{0}', '\u{0}']), ('\u{ab95}', ['\u{13c5}', '\u{0}', '\u{0}']), - ('\u{ab96}', ['\u{13c6}', '\u{0}', '\u{0}']), ('\u{ab97}', ['\u{13c7}', '\u{0}', '\u{0}']), - ('\u{ab98}', ['\u{13c8}', '\u{0}', '\u{0}']), ('\u{ab99}', ['\u{13c9}', '\u{0}', '\u{0}']), - ('\u{ab9a}', ['\u{13ca}', '\u{0}', '\u{0}']), ('\u{ab9b}', ['\u{13cb}', '\u{0}', '\u{0}']), - ('\u{ab9c}', ['\u{13cc}', '\u{0}', '\u{0}']), ('\u{ab9d}', ['\u{13cd}', '\u{0}', '\u{0}']), - ('\u{ab9e}', ['\u{13ce}', '\u{0}', '\u{0}']), ('\u{ab9f}', ['\u{13cf}', '\u{0}', '\u{0}']), - ('\u{aba0}', ['\u{13d0}', '\u{0}', '\u{0}']), ('\u{aba1}', ['\u{13d1}', '\u{0}', '\u{0}']), - ('\u{aba2}', ['\u{13d2}', '\u{0}', '\u{0}']), ('\u{aba3}', ['\u{13d3}', '\u{0}', '\u{0}']), - ('\u{aba4}', ['\u{13d4}', '\u{0}', '\u{0}']), ('\u{aba5}', ['\u{13d5}', '\u{0}', '\u{0}']), - ('\u{aba6}', ['\u{13d6}', '\u{0}', '\u{0}']), ('\u{aba7}', ['\u{13d7}', '\u{0}', '\u{0}']), - ('\u{aba8}', ['\u{13d8}', '\u{0}', '\u{0}']), ('\u{aba9}', ['\u{13d9}', '\u{0}', '\u{0}']), - ('\u{abaa}', ['\u{13da}', '\u{0}', '\u{0}']), ('\u{abab}', ['\u{13db}', '\u{0}', '\u{0}']), - ('\u{abac}', ['\u{13dc}', '\u{0}', '\u{0}']), ('\u{abad}', ['\u{13dd}', '\u{0}', '\u{0}']), - ('\u{abae}', ['\u{13de}', '\u{0}', '\u{0}']), ('\u{abaf}', ['\u{13df}', '\u{0}', '\u{0}']), - ('\u{abb0}', ['\u{13e0}', '\u{0}', '\u{0}']), ('\u{abb1}', ['\u{13e1}', '\u{0}', '\u{0}']), - ('\u{abb2}', ['\u{13e2}', '\u{0}', '\u{0}']), ('\u{abb3}', ['\u{13e3}', '\u{0}', '\u{0}']), - ('\u{abb4}', ['\u{13e4}', '\u{0}', '\u{0}']), ('\u{abb5}', ['\u{13e5}', '\u{0}', '\u{0}']), - ('\u{abb6}', ['\u{13e6}', '\u{0}', '\u{0}']), ('\u{abb7}', ['\u{13e7}', '\u{0}', '\u{0}']), - ('\u{abb8}', ['\u{13e8}', '\u{0}', '\u{0}']), ('\u{abb9}', ['\u{13e9}', '\u{0}', '\u{0}']), - ('\u{abba}', ['\u{13ea}', '\u{0}', '\u{0}']), ('\u{abbb}', ['\u{13eb}', '\u{0}', '\u{0}']), - ('\u{abbc}', ['\u{13ec}', '\u{0}', '\u{0}']), ('\u{abbd}', ['\u{13ed}', '\u{0}', '\u{0}']), - ('\u{abbe}', ['\u{13ee}', '\u{0}', '\u{0}']), ('\u{abbf}', ['\u{13ef}', '\u{0}', '\u{0}']), - ('\u{fb00}', ['\u{46}', '\u{46}', '\u{0}']), ('\u{fb01}', ['\u{46}', '\u{49}', '\u{0}']), - ('\u{fb02}', ['\u{46}', '\u{4c}', '\u{0}']), ('\u{fb03}', ['\u{46}', '\u{46}', '\u{49}']), - ('\u{fb04}', ['\u{46}', '\u{46}', '\u{4c}']), ('\u{fb05}', ['\u{53}', '\u{54}', '\u{0}']), - ('\u{fb06}', ['\u{53}', '\u{54}', '\u{0}']), ('\u{fb13}', ['\u{544}', '\u{546}', '\u{0}']), - ('\u{fb14}', ['\u{544}', '\u{535}', '\u{0}']), - ('\u{fb15}', ['\u{544}', '\u{53b}', '\u{0}']), - ('\u{fb16}', ['\u{54e}', '\u{546}', '\u{0}']), - ('\u{fb17}', ['\u{544}', '\u{53d}', '\u{0}']), ('\u{ff41}', ['\u{ff21}', '\u{0}', '\u{0}']), - ('\u{ff42}', ['\u{ff22}', '\u{0}', '\u{0}']), ('\u{ff43}', ['\u{ff23}', '\u{0}', '\u{0}']), - ('\u{ff44}', ['\u{ff24}', '\u{0}', '\u{0}']), ('\u{ff45}', ['\u{ff25}', '\u{0}', '\u{0}']), - ('\u{ff46}', ['\u{ff26}', '\u{0}', '\u{0}']), ('\u{ff47}', ['\u{ff27}', '\u{0}', '\u{0}']), - ('\u{ff48}', ['\u{ff28}', '\u{0}', '\u{0}']), ('\u{ff49}', ['\u{ff29}', '\u{0}', '\u{0}']), - ('\u{ff4a}', ['\u{ff2a}', '\u{0}', '\u{0}']), ('\u{ff4b}', ['\u{ff2b}', '\u{0}', '\u{0}']), - ('\u{ff4c}', ['\u{ff2c}', '\u{0}', '\u{0}']), ('\u{ff4d}', ['\u{ff2d}', '\u{0}', '\u{0}']), - ('\u{ff4e}', ['\u{ff2e}', '\u{0}', '\u{0}']), ('\u{ff4f}', ['\u{ff2f}', '\u{0}', '\u{0}']), - ('\u{ff50}', ['\u{ff30}', '\u{0}', '\u{0}']), ('\u{ff51}', ['\u{ff31}', '\u{0}', '\u{0}']), - ('\u{ff52}', ['\u{ff32}', '\u{0}', '\u{0}']), ('\u{ff53}', ['\u{ff33}', '\u{0}', '\u{0}']), - ('\u{ff54}', ['\u{ff34}', '\u{0}', '\u{0}']), ('\u{ff55}', ['\u{ff35}', '\u{0}', '\u{0}']), - ('\u{ff56}', ['\u{ff36}', '\u{0}', '\u{0}']), ('\u{ff57}', ['\u{ff37}', '\u{0}', '\u{0}']), - ('\u{ff58}', ['\u{ff38}', '\u{0}', '\u{0}']), ('\u{ff59}', ['\u{ff39}', '\u{0}', '\u{0}']), - ('\u{ff5a}', ['\u{ff3a}', '\u{0}', '\u{0}']), - ('\u{10428}', ['\u{10400}', '\u{0}', '\u{0}']), - ('\u{10429}', ['\u{10401}', '\u{0}', '\u{0}']), - ('\u{1042a}', ['\u{10402}', '\u{0}', '\u{0}']), - ('\u{1042b}', ['\u{10403}', '\u{0}', '\u{0}']), - ('\u{1042c}', ['\u{10404}', '\u{0}', '\u{0}']), - ('\u{1042d}', ['\u{10405}', '\u{0}', '\u{0}']), - ('\u{1042e}', ['\u{10406}', '\u{0}', '\u{0}']), - ('\u{1042f}', ['\u{10407}', '\u{0}', '\u{0}']), - ('\u{10430}', ['\u{10408}', '\u{0}', '\u{0}']), - ('\u{10431}', ['\u{10409}', '\u{0}', '\u{0}']), - ('\u{10432}', ['\u{1040a}', '\u{0}', '\u{0}']), - ('\u{10433}', ['\u{1040b}', '\u{0}', '\u{0}']), - ('\u{10434}', ['\u{1040c}', '\u{0}', '\u{0}']), - ('\u{10435}', ['\u{1040d}', '\u{0}', '\u{0}']), - ('\u{10436}', ['\u{1040e}', '\u{0}', '\u{0}']), - ('\u{10437}', ['\u{1040f}', '\u{0}', '\u{0}']), - ('\u{10438}', ['\u{10410}', '\u{0}', '\u{0}']), - ('\u{10439}', ['\u{10411}', '\u{0}', '\u{0}']), - ('\u{1043a}', ['\u{10412}', '\u{0}', '\u{0}']), - ('\u{1043b}', ['\u{10413}', '\u{0}', '\u{0}']), - ('\u{1043c}', ['\u{10414}', '\u{0}', '\u{0}']), - ('\u{1043d}', ['\u{10415}', '\u{0}', '\u{0}']), - ('\u{1043e}', ['\u{10416}', '\u{0}', '\u{0}']), - ('\u{1043f}', ['\u{10417}', '\u{0}', '\u{0}']), - ('\u{10440}', ['\u{10418}', '\u{0}', '\u{0}']), - ('\u{10441}', ['\u{10419}', '\u{0}', '\u{0}']), - ('\u{10442}', ['\u{1041a}', '\u{0}', '\u{0}']), - ('\u{10443}', ['\u{1041b}', '\u{0}', '\u{0}']), - ('\u{10444}', ['\u{1041c}', '\u{0}', '\u{0}']), - ('\u{10445}', ['\u{1041d}', '\u{0}', '\u{0}']), - ('\u{10446}', ['\u{1041e}', '\u{0}', '\u{0}']), - ('\u{10447}', ['\u{1041f}', '\u{0}', '\u{0}']), - ('\u{10448}', ['\u{10420}', '\u{0}', '\u{0}']), - ('\u{10449}', ['\u{10421}', '\u{0}', '\u{0}']), - ('\u{1044a}', ['\u{10422}', '\u{0}', '\u{0}']), - ('\u{1044b}', ['\u{10423}', '\u{0}', '\u{0}']), - ('\u{1044c}', ['\u{10424}', '\u{0}', '\u{0}']), - ('\u{1044d}', ['\u{10425}', '\u{0}', '\u{0}']), - ('\u{1044e}', ['\u{10426}', '\u{0}', '\u{0}']), - ('\u{1044f}', ['\u{10427}', '\u{0}', '\u{0}']), - ('\u{104d8}', ['\u{104b0}', '\u{0}', '\u{0}']), - ('\u{104d9}', ['\u{104b1}', '\u{0}', '\u{0}']), - ('\u{104da}', ['\u{104b2}', '\u{0}', '\u{0}']), - ('\u{104db}', ['\u{104b3}', '\u{0}', '\u{0}']), - ('\u{104dc}', ['\u{104b4}', '\u{0}', '\u{0}']), - ('\u{104dd}', ['\u{104b5}', '\u{0}', '\u{0}']), - ('\u{104de}', ['\u{104b6}', '\u{0}', '\u{0}']), - ('\u{104df}', ['\u{104b7}', '\u{0}', '\u{0}']), - ('\u{104e0}', ['\u{104b8}', '\u{0}', '\u{0}']), - ('\u{104e1}', ['\u{104b9}', '\u{0}', '\u{0}']), - ('\u{104e2}', ['\u{104ba}', '\u{0}', '\u{0}']), - ('\u{104e3}', ['\u{104bb}', '\u{0}', '\u{0}']), - ('\u{104e4}', ['\u{104bc}', '\u{0}', '\u{0}']), - ('\u{104e5}', ['\u{104bd}', '\u{0}', '\u{0}']), - ('\u{104e6}', ['\u{104be}', '\u{0}', '\u{0}']), - ('\u{104e7}', ['\u{104bf}', '\u{0}', '\u{0}']), - ('\u{104e8}', ['\u{104c0}', '\u{0}', '\u{0}']), - ('\u{104e9}', ['\u{104c1}', '\u{0}', '\u{0}']), - ('\u{104ea}', ['\u{104c2}', '\u{0}', '\u{0}']), - ('\u{104eb}', ['\u{104c3}', '\u{0}', '\u{0}']), - ('\u{104ec}', ['\u{104c4}', '\u{0}', '\u{0}']), - ('\u{104ed}', ['\u{104c5}', '\u{0}', '\u{0}']), - ('\u{104ee}', ['\u{104c6}', '\u{0}', '\u{0}']), - ('\u{104ef}', ['\u{104c7}', '\u{0}', '\u{0}']), - ('\u{104f0}', ['\u{104c8}', '\u{0}', '\u{0}']), - ('\u{104f1}', ['\u{104c9}', '\u{0}', '\u{0}']), - ('\u{104f2}', ['\u{104ca}', '\u{0}', '\u{0}']), - ('\u{104f3}', ['\u{104cb}', '\u{0}', '\u{0}']), - ('\u{104f4}', ['\u{104cc}', '\u{0}', '\u{0}']), - ('\u{104f5}', ['\u{104cd}', '\u{0}', '\u{0}']), - ('\u{104f6}', ['\u{104ce}', '\u{0}', '\u{0}']), - ('\u{104f7}', ['\u{104cf}', '\u{0}', '\u{0}']), - ('\u{104f8}', ['\u{104d0}', '\u{0}', '\u{0}']), - ('\u{104f9}', ['\u{104d1}', '\u{0}', '\u{0}']), - ('\u{104fa}', ['\u{104d2}', '\u{0}', '\u{0}']), - ('\u{104fb}', ['\u{104d3}', '\u{0}', '\u{0}']), - ('\u{10597}', ['\u{10570}', '\u{0}', '\u{0}']), - ('\u{10598}', ['\u{10571}', '\u{0}', '\u{0}']), - ('\u{10599}', ['\u{10572}', '\u{0}', '\u{0}']), - ('\u{1059a}', ['\u{10573}', '\u{0}', '\u{0}']), - ('\u{1059b}', ['\u{10574}', '\u{0}', '\u{0}']), - ('\u{1059c}', ['\u{10575}', '\u{0}', '\u{0}']), - ('\u{1059d}', ['\u{10576}', '\u{0}', '\u{0}']), - ('\u{1059e}', ['\u{10577}', '\u{0}', '\u{0}']), - ('\u{1059f}', ['\u{10578}', '\u{0}', '\u{0}']), - ('\u{105a0}', ['\u{10579}', '\u{0}', '\u{0}']), - ('\u{105a1}', ['\u{1057a}', '\u{0}', '\u{0}']), - ('\u{105a3}', ['\u{1057c}', '\u{0}', '\u{0}']), - ('\u{105a4}', ['\u{1057d}', '\u{0}', '\u{0}']), - ('\u{105a5}', ['\u{1057e}', '\u{0}', '\u{0}']), - ('\u{105a6}', ['\u{1057f}', '\u{0}', '\u{0}']), - ('\u{105a7}', ['\u{10580}', '\u{0}', '\u{0}']), - ('\u{105a8}', ['\u{10581}', '\u{0}', '\u{0}']), - ('\u{105a9}', ['\u{10582}', '\u{0}', '\u{0}']), - ('\u{105aa}', ['\u{10583}', '\u{0}', '\u{0}']), - ('\u{105ab}', ['\u{10584}', '\u{0}', '\u{0}']), - ('\u{105ac}', ['\u{10585}', '\u{0}', '\u{0}']), - ('\u{105ad}', ['\u{10586}', '\u{0}', '\u{0}']), - ('\u{105ae}', ['\u{10587}', '\u{0}', '\u{0}']), - ('\u{105af}', ['\u{10588}', '\u{0}', '\u{0}']), - ('\u{105b0}', ['\u{10589}', '\u{0}', '\u{0}']), - ('\u{105b1}', ['\u{1058a}', '\u{0}', '\u{0}']), - ('\u{105b3}', ['\u{1058c}', '\u{0}', '\u{0}']), - ('\u{105b4}', ['\u{1058d}', '\u{0}', '\u{0}']), - ('\u{105b5}', ['\u{1058e}', '\u{0}', '\u{0}']), - ('\u{105b6}', ['\u{1058f}', '\u{0}', '\u{0}']), - ('\u{105b7}', ['\u{10590}', '\u{0}', '\u{0}']), - ('\u{105b8}', ['\u{10591}', '\u{0}', '\u{0}']), - ('\u{105b9}', ['\u{10592}', '\u{0}', '\u{0}']), - ('\u{105bb}', ['\u{10594}', '\u{0}', '\u{0}']), - ('\u{105bc}', ['\u{10595}', '\u{0}', '\u{0}']), - ('\u{10cc0}', ['\u{10c80}', '\u{0}', '\u{0}']), - ('\u{10cc1}', ['\u{10c81}', '\u{0}', '\u{0}']), - ('\u{10cc2}', ['\u{10c82}', '\u{0}', '\u{0}']), - ('\u{10cc3}', ['\u{10c83}', '\u{0}', '\u{0}']), - ('\u{10cc4}', ['\u{10c84}', '\u{0}', '\u{0}']), - ('\u{10cc5}', ['\u{10c85}', '\u{0}', '\u{0}']), - ('\u{10cc6}', ['\u{10c86}', '\u{0}', '\u{0}']), - ('\u{10cc7}', ['\u{10c87}', '\u{0}', '\u{0}']), - ('\u{10cc8}', ['\u{10c88}', '\u{0}', '\u{0}']), - ('\u{10cc9}', ['\u{10c89}', '\u{0}', '\u{0}']), - ('\u{10cca}', ['\u{10c8a}', '\u{0}', '\u{0}']), - ('\u{10ccb}', ['\u{10c8b}', '\u{0}', '\u{0}']), - ('\u{10ccc}', ['\u{10c8c}', '\u{0}', '\u{0}']), - ('\u{10ccd}', ['\u{10c8d}', '\u{0}', '\u{0}']), - ('\u{10cce}', ['\u{10c8e}', '\u{0}', '\u{0}']), - ('\u{10ccf}', ['\u{10c8f}', '\u{0}', '\u{0}']), - ('\u{10cd0}', ['\u{10c90}', '\u{0}', '\u{0}']), - ('\u{10cd1}', ['\u{10c91}', '\u{0}', '\u{0}']), - ('\u{10cd2}', ['\u{10c92}', '\u{0}', '\u{0}']), - ('\u{10cd3}', ['\u{10c93}', '\u{0}', '\u{0}']), - ('\u{10cd4}', ['\u{10c94}', '\u{0}', '\u{0}']), - ('\u{10cd5}', ['\u{10c95}', '\u{0}', '\u{0}']), - ('\u{10cd6}', ['\u{10c96}', '\u{0}', '\u{0}']), - ('\u{10cd7}', ['\u{10c97}', '\u{0}', '\u{0}']), - ('\u{10cd8}', ['\u{10c98}', '\u{0}', '\u{0}']), - ('\u{10cd9}', ['\u{10c99}', '\u{0}', '\u{0}']), - ('\u{10cda}', ['\u{10c9a}', '\u{0}', '\u{0}']), - ('\u{10cdb}', ['\u{10c9b}', '\u{0}', '\u{0}']), - ('\u{10cdc}', ['\u{10c9c}', '\u{0}', '\u{0}']), - ('\u{10cdd}', ['\u{10c9d}', '\u{0}', '\u{0}']), - ('\u{10cde}', ['\u{10c9e}', '\u{0}', '\u{0}']), - ('\u{10cdf}', ['\u{10c9f}', '\u{0}', '\u{0}']), - ('\u{10ce0}', ['\u{10ca0}', '\u{0}', '\u{0}']), - ('\u{10ce1}', ['\u{10ca1}', '\u{0}', '\u{0}']), - ('\u{10ce2}', ['\u{10ca2}', '\u{0}', '\u{0}']), - ('\u{10ce3}', ['\u{10ca3}', '\u{0}', '\u{0}']), - ('\u{10ce4}', ['\u{10ca4}', '\u{0}', '\u{0}']), - ('\u{10ce5}', ['\u{10ca5}', '\u{0}', '\u{0}']), - ('\u{10ce6}', ['\u{10ca6}', '\u{0}', '\u{0}']), - ('\u{10ce7}', ['\u{10ca7}', '\u{0}', '\u{0}']), - ('\u{10ce8}', ['\u{10ca8}', '\u{0}', '\u{0}']), - ('\u{10ce9}', ['\u{10ca9}', '\u{0}', '\u{0}']), - ('\u{10cea}', ['\u{10caa}', '\u{0}', '\u{0}']), - ('\u{10ceb}', ['\u{10cab}', '\u{0}', '\u{0}']), - ('\u{10cec}', ['\u{10cac}', '\u{0}', '\u{0}']), - ('\u{10ced}', ['\u{10cad}', '\u{0}', '\u{0}']), - ('\u{10cee}', ['\u{10cae}', '\u{0}', '\u{0}']), - ('\u{10cef}', ['\u{10caf}', '\u{0}', '\u{0}']), - ('\u{10cf0}', ['\u{10cb0}', '\u{0}', '\u{0}']), - ('\u{10cf1}', ['\u{10cb1}', '\u{0}', '\u{0}']), - ('\u{10cf2}', ['\u{10cb2}', '\u{0}', '\u{0}']), - ('\u{10d70}', ['\u{10d50}', '\u{0}', '\u{0}']), - ('\u{10d71}', ['\u{10d51}', '\u{0}', '\u{0}']), - ('\u{10d72}', ['\u{10d52}', '\u{0}', '\u{0}']), - ('\u{10d73}', ['\u{10d53}', '\u{0}', '\u{0}']), - ('\u{10d74}', ['\u{10d54}', '\u{0}', '\u{0}']), - ('\u{10d75}', ['\u{10d55}', '\u{0}', '\u{0}']), - ('\u{10d76}', ['\u{10d56}', '\u{0}', '\u{0}']), - ('\u{10d77}', ['\u{10d57}', '\u{0}', '\u{0}']), - ('\u{10d78}', ['\u{10d58}', '\u{0}', '\u{0}']), - ('\u{10d79}', ['\u{10d59}', '\u{0}', '\u{0}']), - ('\u{10d7a}', ['\u{10d5a}', '\u{0}', '\u{0}']), - ('\u{10d7b}', ['\u{10d5b}', '\u{0}', '\u{0}']), - ('\u{10d7c}', ['\u{10d5c}', '\u{0}', '\u{0}']), - ('\u{10d7d}', ['\u{10d5d}', '\u{0}', '\u{0}']), - ('\u{10d7e}', ['\u{10d5e}', '\u{0}', '\u{0}']), - ('\u{10d7f}', ['\u{10d5f}', '\u{0}', '\u{0}']), - ('\u{10d80}', ['\u{10d60}', '\u{0}', '\u{0}']), - ('\u{10d81}', ['\u{10d61}', '\u{0}', '\u{0}']), - ('\u{10d82}', ['\u{10d62}', '\u{0}', '\u{0}']), - ('\u{10d83}', ['\u{10d63}', '\u{0}', '\u{0}']), - ('\u{10d84}', ['\u{10d64}', '\u{0}', '\u{0}']), - ('\u{10d85}', ['\u{10d65}', '\u{0}', '\u{0}']), - ('\u{118c0}', ['\u{118a0}', '\u{0}', '\u{0}']), - ('\u{118c1}', ['\u{118a1}', '\u{0}', '\u{0}']), - ('\u{118c2}', ['\u{118a2}', '\u{0}', '\u{0}']), - ('\u{118c3}', ['\u{118a3}', '\u{0}', '\u{0}']), - ('\u{118c4}', ['\u{118a4}', '\u{0}', '\u{0}']), - ('\u{118c5}', ['\u{118a5}', '\u{0}', '\u{0}']), - ('\u{118c6}', ['\u{118a6}', '\u{0}', '\u{0}']), - ('\u{118c7}', ['\u{118a7}', '\u{0}', '\u{0}']), - ('\u{118c8}', ['\u{118a8}', '\u{0}', '\u{0}']), - ('\u{118c9}', ['\u{118a9}', '\u{0}', '\u{0}']), - ('\u{118ca}', ['\u{118aa}', '\u{0}', '\u{0}']), - ('\u{118cb}', ['\u{118ab}', '\u{0}', '\u{0}']), - ('\u{118cc}', ['\u{118ac}', '\u{0}', '\u{0}']), - ('\u{118cd}', ['\u{118ad}', '\u{0}', '\u{0}']), - ('\u{118ce}', ['\u{118ae}', '\u{0}', '\u{0}']), - ('\u{118cf}', ['\u{118af}', '\u{0}', '\u{0}']), - ('\u{118d0}', ['\u{118b0}', '\u{0}', '\u{0}']), - ('\u{118d1}', ['\u{118b1}', '\u{0}', '\u{0}']), - ('\u{118d2}', ['\u{118b2}', '\u{0}', '\u{0}']), - ('\u{118d3}', ['\u{118b3}', '\u{0}', '\u{0}']), - ('\u{118d4}', ['\u{118b4}', '\u{0}', '\u{0}']), - ('\u{118d5}', ['\u{118b5}', '\u{0}', '\u{0}']), - ('\u{118d6}', ['\u{118b6}', '\u{0}', '\u{0}']), - ('\u{118d7}', ['\u{118b7}', '\u{0}', '\u{0}']), - ('\u{118d8}', ['\u{118b8}', '\u{0}', '\u{0}']), - ('\u{118d9}', ['\u{118b9}', '\u{0}', '\u{0}']), - ('\u{118da}', ['\u{118ba}', '\u{0}', '\u{0}']), - ('\u{118db}', ['\u{118bb}', '\u{0}', '\u{0}']), - ('\u{118dc}', ['\u{118bc}', '\u{0}', '\u{0}']), - ('\u{118dd}', ['\u{118bd}', '\u{0}', '\u{0}']), - ('\u{118de}', ['\u{118be}', '\u{0}', '\u{0}']), - ('\u{118df}', ['\u{118bf}', '\u{0}', '\u{0}']), - ('\u{16e60}', ['\u{16e40}', '\u{0}', '\u{0}']), - ('\u{16e61}', ['\u{16e41}', '\u{0}', '\u{0}']), - ('\u{16e62}', ['\u{16e42}', '\u{0}', '\u{0}']), - ('\u{16e63}', ['\u{16e43}', '\u{0}', '\u{0}']), - ('\u{16e64}', ['\u{16e44}', '\u{0}', '\u{0}']), - ('\u{16e65}', ['\u{16e45}', '\u{0}', '\u{0}']), - ('\u{16e66}', ['\u{16e46}', '\u{0}', '\u{0}']), - ('\u{16e67}', ['\u{16e47}', '\u{0}', '\u{0}']), - ('\u{16e68}', ['\u{16e48}', '\u{0}', '\u{0}']), - ('\u{16e69}', ['\u{16e49}', '\u{0}', '\u{0}']), - ('\u{16e6a}', ['\u{16e4a}', '\u{0}', '\u{0}']), - ('\u{16e6b}', ['\u{16e4b}', '\u{0}', '\u{0}']), - ('\u{16e6c}', ['\u{16e4c}', '\u{0}', '\u{0}']), - ('\u{16e6d}', ['\u{16e4d}', '\u{0}', '\u{0}']), - ('\u{16e6e}', ['\u{16e4e}', '\u{0}', '\u{0}']), - ('\u{16e6f}', ['\u{16e4f}', '\u{0}', '\u{0}']), - ('\u{16e70}', ['\u{16e50}', '\u{0}', '\u{0}']), - ('\u{16e71}', ['\u{16e51}', '\u{0}', '\u{0}']), - ('\u{16e72}', ['\u{16e52}', '\u{0}', '\u{0}']), - ('\u{16e73}', ['\u{16e53}', '\u{0}', '\u{0}']), - ('\u{16e74}', ['\u{16e54}', '\u{0}', '\u{0}']), - ('\u{16e75}', ['\u{16e55}', '\u{0}', '\u{0}']), - ('\u{16e76}', ['\u{16e56}', '\u{0}', '\u{0}']), - ('\u{16e77}', ['\u{16e57}', '\u{0}', '\u{0}']), - ('\u{16e78}', ['\u{16e58}', '\u{0}', '\u{0}']), - ('\u{16e79}', ['\u{16e59}', '\u{0}', '\u{0}']), - ('\u{16e7a}', ['\u{16e5a}', '\u{0}', '\u{0}']), - ('\u{16e7b}', ['\u{16e5b}', '\u{0}', '\u{0}']), - ('\u{16e7c}', ['\u{16e5c}', '\u{0}', '\u{0}']), - ('\u{16e7d}', ['\u{16e5d}', '\u{0}', '\u{0}']), - ('\u{16e7e}', ['\u{16e5e}', '\u{0}', '\u{0}']), - ('\u{16e7f}', ['\u{16e5f}', '\u{0}', '\u{0}']), - ('\u{16ebb}', ['\u{16ea0}', '\u{0}', '\u{0}']), - ('\u{16ebc}', ['\u{16ea1}', '\u{0}', '\u{0}']), - ('\u{16ebd}', ['\u{16ea2}', '\u{0}', '\u{0}']), - ('\u{16ebe}', ['\u{16ea3}', '\u{0}', '\u{0}']), - ('\u{16ebf}', ['\u{16ea4}', '\u{0}', '\u{0}']), - ('\u{16ec0}', ['\u{16ea5}', '\u{0}', '\u{0}']), - ('\u{16ec1}', ['\u{16ea6}', '\u{0}', '\u{0}']), - ('\u{16ec2}', ['\u{16ea7}', '\u{0}', '\u{0}']), - ('\u{16ec3}', ['\u{16ea8}', '\u{0}', '\u{0}']), - ('\u{16ec4}', ['\u{16ea9}', '\u{0}', '\u{0}']), - ('\u{16ec5}', ['\u{16eaa}', '\u{0}', '\u{0}']), - ('\u{16ec6}', ['\u{16eab}', '\u{0}', '\u{0}']), - ('\u{16ec7}', ['\u{16eac}', '\u{0}', '\u{0}']), - ('\u{16ec8}', ['\u{16ead}', '\u{0}', '\u{0}']), - ('\u{16ec9}', ['\u{16eae}', '\u{0}', '\u{0}']), - ('\u{16eca}', ['\u{16eaf}', '\u{0}', '\u{0}']), - ('\u{16ecb}', ['\u{16eb0}', '\u{0}', '\u{0}']), - ('\u{16ecc}', ['\u{16eb1}', '\u{0}', '\u{0}']), - ('\u{16ecd}', ['\u{16eb2}', '\u{0}', '\u{0}']), - ('\u{16ece}', ['\u{16eb3}', '\u{0}', '\u{0}']), - ('\u{16ecf}', ['\u{16eb4}', '\u{0}', '\u{0}']), - ('\u{16ed0}', ['\u{16eb5}', '\u{0}', '\u{0}']), - ('\u{16ed1}', ['\u{16eb6}', '\u{0}', '\u{0}']), - ('\u{16ed2}', ['\u{16eb7}', '\u{0}', '\u{0}']), - ('\u{16ed3}', ['\u{16eb8}', '\u{0}', '\u{0}']), - ('\u{1e922}', ['\u{1e900}', '\u{0}', '\u{0}']), - ('\u{1e923}', ['\u{1e901}', '\u{0}', '\u{0}']), - ('\u{1e924}', ['\u{1e902}', '\u{0}', '\u{0}']), - ('\u{1e925}', ['\u{1e903}', '\u{0}', '\u{0}']), - ('\u{1e926}', ['\u{1e904}', '\u{0}', '\u{0}']), - ('\u{1e927}', ['\u{1e905}', '\u{0}', '\u{0}']), - ('\u{1e928}', ['\u{1e906}', '\u{0}', '\u{0}']), - ('\u{1e929}', ['\u{1e907}', '\u{0}', '\u{0}']), - ('\u{1e92a}', ['\u{1e908}', '\u{0}', '\u{0}']), - ('\u{1e92b}', ['\u{1e909}', '\u{0}', '\u{0}']), - ('\u{1e92c}', ['\u{1e90a}', '\u{0}', '\u{0}']), - ('\u{1e92d}', ['\u{1e90b}', '\u{0}', '\u{0}']), - ('\u{1e92e}', ['\u{1e90c}', '\u{0}', '\u{0}']), - ('\u{1e92f}', ['\u{1e90d}', '\u{0}', '\u{0}']), - ('\u{1e930}', ['\u{1e90e}', '\u{0}', '\u{0}']), - ('\u{1e931}', ['\u{1e90f}', '\u{0}', '\u{0}']), - ('\u{1e932}', ['\u{1e910}', '\u{0}', '\u{0}']), - ('\u{1e933}', ['\u{1e911}', '\u{0}', '\u{0}']), - ('\u{1e934}', ['\u{1e912}', '\u{0}', '\u{0}']), - ('\u{1e935}', ['\u{1e913}', '\u{0}', '\u{0}']), - ('\u{1e936}', ['\u{1e914}', '\u{0}', '\u{0}']), - ('\u{1e937}', ['\u{1e915}', '\u{0}', '\u{0}']), - ('\u{1e938}', ['\u{1e916}', '\u{0}', '\u{0}']), - ('\u{1e939}', ['\u{1e917}', '\u{0}', '\u{0}']), - ('\u{1e93a}', ['\u{1e918}', '\u{0}', '\u{0}']), - ('\u{1e93b}', ['\u{1e919}', '\u{0}', '\u{0}']), - ('\u{1e93c}', ['\u{1e91a}', '\u{0}', '\u{0}']), - ('\u{1e93d}', ['\u{1e91b}', '\u{0}', '\u{0}']), - ('\u{1e93e}', ['\u{1e91c}', '\u{0}', '\u{0}']), - ('\u{1e93f}', ['\u{1e91d}', '\u{0}', '\u{0}']), - ('\u{1e940}', ['\u{1e91e}', '\u{0}', '\u{0}']), - ('\u{1e941}', ['\u{1e91f}', '\u{0}', '\u{0}']), - ('\u{1e942}', ['\u{1e920}', '\u{0}', '\u{0}']), - ('\u{1e943}', ['\u{1e921}', '\u{0}', '\u{0}']), -]; From abc7e749f2f62645bbbbb26ff96d3432321ac238 Mon Sep 17 00:00:00 2001 From: sayantn Date: Wed, 8 Oct 2025 08:12:09 +0530 Subject: [PATCH 265/358] Add alignment parameter to `simd_masked_{load,store}` --- core/src/intrinsics/simd.rs | 25 +++++++++++++++----- portable-simd/crates/core_simd/src/vector.rs | 18 ++++++++++++-- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/core/src/intrinsics/simd.rs b/core/src/intrinsics/simd.rs index 19488082cc33d..c56e04bfc2d90 100644 --- a/core/src/intrinsics/simd.rs +++ b/core/src/intrinsics/simd.rs @@ -2,6 +2,8 @@ //! //! In this module, a "vector" is any `repr(simd)` type. +use crate::marker::ConstParamTy; + /// Inserts an element into a vector, returning the updated vector. /// /// `T` must be a vector with element type `U`, and `idx` must be `const`. @@ -377,6 +379,19 @@ pub unsafe fn simd_gather(val: T, ptr: U, mask: V) -> T; #[rustc_nounwind] pub unsafe fn simd_scatter(val: T, ptr: U, mask: V); +/// A type for alignment options for SIMD masked load/store intrinsics. +#[derive(Debug, ConstParamTy, PartialEq, Eq)] +pub enum SimdAlign { + // These values must match the compiler's `SimdAlign` defined in + // `rustc_middle/src/ty/consts/int.rs`! + /// No alignment requirements on the pointer + Unaligned = 0, + /// The pointer must be aligned to the element type of the SIMD vector + Element = 1, + /// The pointer must be aligned to the SIMD vector type + Vector = 2, +} + /// Reads a vector of pointers. /// /// `T` must be a vector. @@ -392,13 +407,12 @@ pub unsafe fn simd_scatter(val: T, ptr: U, mask: V); /// `val`. /// /// # Safety -/// Unmasked values in `T` must be readable as if by `::read` (e.g. aligned to the element -/// type). +/// `ptr` must be aligned according to the `ALIGN` parameter, see [`SimdAlign`] for details. /// /// `mask` must only contain `0` or `!0` values. #[rustc_intrinsic] #[rustc_nounwind] -pub unsafe fn simd_masked_load(mask: V, ptr: U, val: T) -> T; +pub unsafe fn simd_masked_load(mask: V, ptr: U, val: T) -> T; /// Writes to a vector of pointers. /// @@ -414,13 +428,12 @@ pub unsafe fn simd_masked_load(mask: V, ptr: U, val: T) -> T; /// Otherwise if the corresponding value in `mask` is `0`, do nothing. /// /// # Safety -/// Unmasked values in `T` must be writeable as if by `::write` (e.g. aligned to the element -/// type). +/// `ptr` must be aligned according to the `ALIGN` parameter, see [`SimdAlign`] for details. /// /// `mask` must only contain `0` or `!0` values. #[rustc_intrinsic] #[rustc_nounwind] -pub unsafe fn simd_masked_store(mask: V, ptr: U, val: T); +pub unsafe fn simd_masked_store(mask: V, ptr: U, val: T); /// Adds two simd vectors elementwise, with saturation. /// diff --git a/portable-simd/crates/core_simd/src/vector.rs b/portable-simd/crates/core_simd/src/vector.rs index d76a6cd52bfc5..f40031f8c4da7 100644 --- a/portable-simd/crates/core_simd/src/vector.rs +++ b/portable-simd/crates/core_simd/src/vector.rs @@ -474,7 +474,14 @@ where or: Self, ) -> Self { // SAFETY: The safety of reading elements through `ptr` is ensured by the caller. - unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) } + unsafe { + core::intrinsics::simd::simd_masked_load::< + _, + _, + _, + { core::intrinsics::simd::SimdAlign::Element }, + >(enable.to_int(), ptr, or) + } } /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. @@ -723,7 +730,14 @@ where #[inline] pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<::Mask, N>) { // SAFETY: The safety of writing elements through `ptr` is ensured by the caller. - unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) } + unsafe { + core::intrinsics::simd::simd_masked_store::< + _, + _, + _, + { core::intrinsics::simd::SimdAlign::Element }, + >(enable.to_int(), ptr, self) + } } /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. From d1d649f310b2a20a5b4f980456cbc1f8a1119670 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 3 Nov 2025 17:17:14 -0800 Subject: [PATCH 266/358] Repoint Waker::from_fn_ptr from feature request issue to tracking issue --- core/src/task/wake.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/task/wake.rs b/core/src/task/wake.rs index 178717fe42eac..480b3c4577eb6 100644 --- a/core/src/task/wake.rs +++ b/core/src/task/wake.rs @@ -588,7 +588,7 @@ impl Waker { /// Constructs a `Waker` from a function pointer. #[inline] #[must_use] - #[unstable(feature = "waker_from_fn_ptr", issue = "146055")] + #[unstable(feature = "waker_from_fn_ptr", issue = "148457")] pub const fn from_fn_ptr(f: fn()) -> Self { // SAFETY: Unsafe is used for transmutes, pointer came from `fn()` so it // is sound to transmute it back to `fn()`. @@ -905,7 +905,7 @@ impl LocalWaker { /// Constructs a `LocalWaker` from a function pointer. #[inline] #[must_use] - #[unstable(feature = "waker_from_fn_ptr", issue = "146055")] + #[unstable(feature = "waker_from_fn_ptr", issue = "148457")] pub const fn from_fn_ptr(f: fn()) -> Self { // SAFETY: Unsafe is used for transmutes, pointer came from `fn()` so it // is sound to transmute it back to `fn()`. From c7875a07659837009b3b93c056bceaf2397ad637 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 24 Sep 2025 14:09:56 +0200 Subject: [PATCH 267/358] assert that `#[rustc_pass_indirectly_in_non_rustic_abis]` is respected --- core/src/ffi/va_list.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/src/ffi/va_list.rs b/core/src/ffi/va_list.rs index 46ccf330d1c22..3c9587d383e30 100644 --- a/core/src/ffi/va_list.rs +++ b/core/src/ffi/va_list.rs @@ -299,3 +299,15 @@ impl<'f> Drop for VaListImpl<'f> { // This works for now, since `va_end` is a no-op on all current LLVM targets. } } + +// Checks (via an assert in `compiler/rustc_ty_utils/src/abi.rs`) that the C ABI for the current +// target correctly implements `rustc_pass_indirectly_in_non_rustic_abis`. +const _: () = { + #[repr(C)] + #[rustc_pass_indirectly_in_non_rustic_abis] + struct Type(usize); + + const extern "C" fn c(_: Type) {} + + c(Type(0)) +}; From c48b3f2f8fe4bfc60cb623a995e1ea034b2da535 Mon Sep 17 00:00:00 2001 From: Marijn Schouten Date: Fri, 17 Oct 2025 12:10:51 +0000 Subject: [PATCH 268/358] btree: cleanup difference, intersection, is_subset --- alloc/src/collections/btree/set.rs | 179 ++++++++++++++--------------- alloctests/Cargo.toml | 2 +- 2 files changed, 87 insertions(+), 94 deletions(-) diff --git a/alloc/src/collections/btree/set.rs b/alloc/src/collections/btree/set.rs index 6e6996bcbd69b..cb3e14252f8a3 100644 --- a/alloc/src/collections/btree/set.rs +++ b/alloc/src/collections/btree/set.rs @@ -427,39 +427,35 @@ impl BTreeSet { where T: Ord, { - let (self_min, self_max) = - if let (Some(self_min), Some(self_max)) = (self.first(), self.last()) { - (self_min, self_max) - } else { - return Difference { inner: DifferenceInner::Iterate(self.iter()) }; - }; - let (other_min, other_max) = - if let (Some(other_min), Some(other_max)) = (other.first(), other.last()) { - (other_min, other_max) - } else { - return Difference { inner: DifferenceInner::Iterate(self.iter()) }; - }; - Difference { - inner: match (self_min.cmp(other_max), self_max.cmp(other_min)) { - (Greater, _) | (_, Less) => DifferenceInner::Iterate(self.iter()), - (Equal, _) => { - let mut self_iter = self.iter(); - self_iter.next(); - DifferenceInner::Iterate(self_iter) - } - (_, Equal) => { - let mut self_iter = self.iter(); - self_iter.next_back(); - DifferenceInner::Iterate(self_iter) - } - _ if self.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF => { - DifferenceInner::Search { self_iter: self.iter(), other_set: other } - } - _ => DifferenceInner::Stitch { - self_iter: self.iter(), - other_iter: other.iter().peekable(), + if let Some(self_min) = self.first() + && let Some(self_max) = self.last() + && let Some(other_min) = other.first() + && let Some(other_max) = other.last() + { + Difference { + inner: match (self_min.cmp(other_max), self_max.cmp(other_min)) { + (Greater, _) | (_, Less) => DifferenceInner::Iterate(self.iter()), + (Equal, _) => { + let mut self_iter = self.iter(); + self_iter.next(); + DifferenceInner::Iterate(self_iter) + } + (_, Equal) => { + let mut self_iter = self.iter(); + self_iter.next_back(); + DifferenceInner::Iterate(self_iter) + } + _ if self.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF => { + DifferenceInner::Search { self_iter: self.iter(), other_set: other } + } + _ => DifferenceInner::Stitch { + self_iter: self.iter(), + other_iter: other.iter().peekable(), + }, }, - }, + } + } else { + Difference { inner: DifferenceInner::Iterate(self.iter()) } } } @@ -519,31 +515,27 @@ impl BTreeSet { where T: Ord, { - let (self_min, self_max) = - if let (Some(self_min), Some(self_max)) = (self.first(), self.last()) { - (self_min, self_max) - } else { - return Intersection { inner: IntersectionInner::Answer(None) }; - }; - let (other_min, other_max) = - if let (Some(other_min), Some(other_max)) = (other.first(), other.last()) { - (other_min, other_max) - } else { - return Intersection { inner: IntersectionInner::Answer(None) }; - }; - Intersection { - inner: match (self_min.cmp(other_max), self_max.cmp(other_min)) { - (Greater, _) | (_, Less) => IntersectionInner::Answer(None), - (Equal, _) => IntersectionInner::Answer(Some(self_min)), - (_, Equal) => IntersectionInner::Answer(Some(self_max)), - _ if self.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF => { - IntersectionInner::Search { small_iter: self.iter(), large_set: other } - } - _ if other.len() <= self.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF => { - IntersectionInner::Search { small_iter: other.iter(), large_set: self } - } - _ => IntersectionInner::Stitch { a: self.iter(), b: other.iter() }, - }, + if let Some(self_min) = self.first() + && let Some(self_max) = self.last() + && let Some(other_min) = other.first() + && let Some(other_max) = other.last() + { + Intersection { + inner: match (self_min.cmp(other_max), self_max.cmp(other_min)) { + (Greater, _) | (_, Less) => IntersectionInner::Answer(None), + (Equal, _) => IntersectionInner::Answer(Some(self_min)), + (_, Equal) => IntersectionInner::Answer(Some(self_max)), + _ if self.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF => { + IntersectionInner::Search { small_iter: self.iter(), large_set: other } + } + _ if other.len() <= self.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF => { + IntersectionInner::Search { small_iter: other.iter(), large_set: self } + } + _ => IntersectionInner::Stitch { a: self.iter(), b: other.iter() }, + }, + } + } else { + Intersection { inner: IntersectionInner::Answer(None) } } } @@ -694,55 +686,56 @@ impl BTreeSet { // Same result as self.difference(other).next().is_none() // but the code below is faster (hugely in some cases). if self.len() > other.len() { - return false; + return false; // self has more elements than other } - let (self_min, self_max) = - if let (Some(self_min), Some(self_max)) = (self.first(), self.last()) { - (self_min, self_max) - } else { - return true; // self is empty - }; - let (other_min, other_max) = - if let (Some(other_min), Some(other_max)) = (other.first(), other.last()) { - (other_min, other_max) - } else { - return false; // other is empty - }; + let (Some(self_min), Some(self_max)) = (self.first(), self.last()) else { + return true; // self is empty + }; + let (Some(other_min), Some(other_max)) = (other.first(), other.last()) else { + return false; // other is empty + }; let mut self_iter = self.iter(); match self_min.cmp(other_min) { - Less => return false, + Less => return false, // other does not contain self_min Equal => { - self_iter.next(); + self_iter.next(); // self_min is contained in other, so remove it from consideration + // other_min is now not in self_iter (used below) } - Greater => (), - } + Greater => {} // other_min is not in self_iter (used below) + }; + match self_max.cmp(other_max) { - Greater => return false, + Greater => return false, // other does not contain self_max Equal => { - self_iter.next_back(); + self_iter.next_back(); // self_max is contained in other, so remove it from consideration + // other_max is now not in self_iter (used below) } - Less => (), - } + Less => {} // other_max is not in self_iter (used below) + }; if self_iter.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF { - for next in self_iter { - if !other.contains(next) { - return false; - } - } + self_iter.all(|e| other.contains(e)) } else { let mut other_iter = other.iter(); - other_iter.next(); - other_iter.next_back(); - let mut self_next = self_iter.next(); - while let Some(self1) = self_next { - match other_iter.next().map_or(Less, |other1| self1.cmp(other1)) { - Less => return false, - Equal => self_next = self_iter.next(), - Greater => (), - } + { + // remove other_min and other_max as they are not in self_iter (see above) + other_iter.next(); + other_iter.next_back(); } + // custom `self_iter.all(|e| other.contains(e))` + self_iter.all(|self1| { + while let Some(other1) = other_iter.next() { + match other1.cmp(self1) { + // happens up to `ITER_PERFORMANCE_TIPPING_SIZE_DIFF * self.len() - 1` times + Less => continue, // skip over elements that are smaller + // happens `self.len()` times + Equal => return true, // self1 is in other + // happens only once + Greater => return false, // self1 is not in other + } + } + false + }) } - true } /// Returns `true` if the set is a superset of another, diff --git a/alloctests/Cargo.toml b/alloctests/Cargo.toml index 07c45d1b82484..3b522bf80a217 100644 --- a/alloctests/Cargo.toml +++ b/alloctests/Cargo.toml @@ -6,7 +6,7 @@ repository = "https://github.com/rust-lang/rust.git" description = "Tests for the Rust Allocation Library" autotests = false autobenches = false -edition = "2021" +edition = "2024" [lib] path = "lib.rs" From 0cc927e04c9c850caae35064d319d30e49b364be Mon Sep 17 00:00:00 2001 From: Chris Denton Date: Tue, 4 Nov 2025 19:14:47 +0000 Subject: [PATCH 269/358] Implement Path::is_empty --- std/src/path.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/std/src/path.rs b/std/src/path.rs index 6e3b1e6e47d0e..1ae36a71270aa 100644 --- a/std/src/path.rs +++ b/std/src/path.rs @@ -2756,6 +2756,28 @@ impl Path { iter_after(self.components().rev(), child.components().rev()).is_some() } + /// Checks whether the `Path` is empty. + /// + /// # Examples + /// + /// ``` + /// #![feature(path_is_empty)] + /// use std::path::Path; + /// + /// let path = Path::new(""); + /// assert!(path.is_empty()); + /// + /// let path = Path::new("foo"); + /// assert!(!path.is_empty()); + /// + /// let path = Path::new("."); + /// assert!(!path.is_empty()); + /// ``` + #[unstable(feature = "path_is_empty", issue = "148494")] + pub fn is_empty(&self) -> bool { + self.as_os_str().is_empty() + } + /// Extracts the stem (non-extension) portion of [`self.file_name`]. /// /// [`self.file_name`]: Path::file_name From 4ed4aa0e397746e80bb2d3527952e9b80f594a04 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 30 Oct 2025 15:04:49 +0000 Subject: [PATCH 270/358] Remove no longer necessary lint allow --- panic_unwind/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/panic_unwind/src/lib.rs b/panic_unwind/src/lib.rs index 83311f3238012..1be19913f260f 100644 --- a/panic_unwind/src/lib.rs +++ b/panic_unwind/src/lib.rs @@ -24,8 +24,6 @@ #![feature(rustc_attrs)] #![panic_runtime] #![feature(panic_runtime)] -// `real_imp` is unused with Miri, so silence warnings. -#![cfg_attr(miri, allow(dead_code))] #![allow(internal_features)] #![warn(unreachable_pub)] #![deny(unsafe_op_in_unsafe_fn)] From e5eb3762caa910316489d4c7f261257d8094d571 Mon Sep 17 00:00:00 2001 From: Nitai Sasson Date: Tue, 4 Nov 2025 23:51:22 +0200 Subject: [PATCH 271/358] Fix link in c_longlong documentation --- core/src/ffi/c_longlong.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/ffi/c_longlong.md b/core/src/ffi/c_longlong.md index 49c61bd61f4ad..234ab344409da 100644 --- a/core/src/ffi/c_longlong.md +++ b/core/src/ffi/c_longlong.md @@ -2,4 +2,4 @@ Equivalent to C's `signed long long` (`long long`) type. This type will almost always be [`i64`], but may differ on some systems. The C standard technically only requires that this type be a signed integer that is at least 64 bits and at least the size of a [`long`], although in practice, no system would have a `long long` that is not an `i64`, as most systems do not have a standardised [`i128`] type. -[`long`]: c_int +[`long`]: c_long From 178ea41bb6944fe6ae829e08e42dc13c99776af7 Mon Sep 17 00:00:00 2001 From: Waffle Lapkin Date: Wed, 5 Nov 2025 17:01:21 +0100 Subject: [PATCH 272/358] Merge `Vec::push{,_mut}_within_capacity` --- alloc/src/vec/mod.rs | 50 ++++++++++++++------------------------------ 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/alloc/src/vec/mod.rs b/alloc/src/vec/mod.rs index 45d6c28e186e4..dc610d7b46741 100644 --- a/alloc/src/vec/mod.rs +++ b/alloc/src/vec/mod.rs @@ -2564,8 +2564,8 @@ impl Vec { let _ = self.push_mut(value); } - /// Appends an element if there is sufficient spare capacity, otherwise an error is returned - /// with the element. + /// Appends an element and returns a reference to it if there is sufficient spare capacity, + /// otherwise an error is returned with the element. /// /// Unlike [`push`] this method will not reallocate when there's insufficient capacity. /// The caller should use [`reserve`] or [`try_reserve`] to ensure that there is enough capacity. @@ -2601,8 +2601,20 @@ impl Vec { /// Takes *O*(1) time. #[inline] #[unstable(feature = "vec_push_within_capacity", issue = "100486")] - pub fn push_within_capacity(&mut self, value: T) -> Result<(), T> { - self.push_mut_within_capacity(value).map(|_| ()) + // #[unstable(feature = "push_mut", issue = "135974")] + pub fn push_within_capacity(&mut self, value: T) -> Result<&mut T, T> { + if self.len == self.buf.capacity() { + return Err(value); + } + + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + + // SAFETY: We just wrote a value to the pointer that will live the lifetime of the reference. + Ok(&mut *end) + } } /// Appends an element to the back of a collection, returning a reference to it. @@ -2654,36 +2666,6 @@ impl Vec { } } - /// Appends an element and returns a reference to it if there is sufficient spare capacity, - /// otherwise an error is returned with the element. - /// - /// Unlike [`push_mut`] this method will not reallocate when there's insufficient capacity. - /// The caller should use [`reserve`] or [`try_reserve`] to ensure that there is enough capacity. - /// - /// [`push_mut`]: Vec::push_mut - /// [`reserve`]: Vec::reserve - /// [`try_reserve`]: Vec::try_reserve - /// - /// # Time complexity - /// - /// Takes *O*(1) time. - #[unstable(feature = "push_mut", issue = "135974")] - // #[unstable(feature = "vec_push_within_capacity", issue = "100486")] - #[inline] - #[must_use = "if you don't need a reference to the value, use `Vec::push_within_capacity` instead"] - pub fn push_mut_within_capacity(&mut self, value: T) -> Result<&mut T, T> { - if self.len == self.buf.capacity() { - return Err(value); - } - unsafe { - let end = self.as_mut_ptr().add(self.len); - ptr::write(end, value); - self.len += 1; - // SAFETY: We just wrote a value to the pointer that will live the lifetime of the reference. - Ok(&mut *end) - } - } - /// Removes the last element from a vector and returns it, or [`None`] if it /// is empty. /// From e7a3548f76279512e57097572b054330b7f48a1e Mon Sep 17 00:00:00 2001 From: Taylor Cramer Date: Wed, 5 Nov 2025 10:19:49 -0800 Subject: [PATCH 273/358] Add Allocator proxy impls for Box, Rc, and Arc This adds to the existing proxy impl for &T. --- alloc/src/boxed.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++ alloc/src/rc.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++ alloc/src/sync.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+) diff --git a/alloc/src/boxed.rs b/alloc/src/boxed.rs index 7ad1679b1c822..39b17514a699c 100644 --- a/alloc/src/boxed.rs +++ b/alloc/src/boxed.rs @@ -2241,3 +2241,55 @@ impl Error for Box { Error::provide(&**self, request); } } + +#[unstable(feature = "allocator_api", issue = "32838")] +unsafe impl Allocator for Box { + #[inline] + fn allocate(&self, layout: Layout) -> Result, AllocError> { + (**self).allocate(layout) + } + + #[inline] + fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { + (**self).allocate_zeroed(layout) + } + + #[inline] + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).deallocate(ptr, layout) } + } + + #[inline] + unsafe fn grow( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).grow(ptr, old_layout, new_layout) } + } + + #[inline] + unsafe fn grow_zeroed( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).grow_zeroed(ptr, old_layout, new_layout) } + } + + #[inline] + unsafe fn shrink( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).shrink(ptr, old_layout, new_layout) } + } +} diff --git a/alloc/src/rc.rs b/alloc/src/rc.rs index a24ea6e526c4b..d25d7044e0001 100644 --- a/alloc/src/rc.rs +++ b/alloc/src/rc.rs @@ -4413,3 +4413,55 @@ impl Drop for UniqueRcUninit { } } } + +#[unstable(feature = "allocator_api", issue = "32838")] +unsafe impl Allocator for Rc { + #[inline] + fn allocate(&self, layout: Layout) -> Result, AllocError> { + (**self).allocate(layout) + } + + #[inline] + fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { + (**self).allocate_zeroed(layout) + } + + #[inline] + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).deallocate(ptr, layout) } + } + + #[inline] + unsafe fn grow( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).grow(ptr, old_layout, new_layout) } + } + + #[inline] + unsafe fn grow_zeroed( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).grow_zeroed(ptr, old_layout, new_layout) } + } + + #[inline] + unsafe fn shrink( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).shrink(ptr, old_layout, new_layout) } + } +} diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs index 13b5cf23e72d8..6618d3f8ad980 100644 --- a/alloc/src/sync.rs +++ b/alloc/src/sync.rs @@ -4780,3 +4780,55 @@ unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for UniqueArc { unsafe { ptr::drop_in_place(&mut (*self.ptr.as_ptr()).data) }; } } + +#[unstable(feature = "allocator_api", issue = "32838")] +unsafe impl Allocator for Arc { + #[inline] + fn allocate(&self, layout: Layout) -> Result, AllocError> { + (**self).allocate(layout) + } + + #[inline] + fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { + (**self).allocate_zeroed(layout) + } + + #[inline] + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).deallocate(ptr, layout) } + } + + #[inline] + unsafe fn grow( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).grow(ptr, old_layout, new_layout) } + } + + #[inline] + unsafe fn grow_zeroed( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).grow_zeroed(ptr, old_layout, new_layout) } + } + + #[inline] + unsafe fn shrink( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: the safety contract must be upheld by the caller + unsafe { (**self).shrink(ptr, old_layout, new_layout) } + } +} From 3bf2d5646440e3f49557069e7f39bd7790311c42 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 4 Nov 2025 15:22:50 +0100 Subject: [PATCH 274/358] use funnel shift as fallback impl for rotating shifts --- core/src/intrinsics/mod.rs | 20 +++++++++++++++++--- core/src/num/uint_macros.rs | 2 ++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index 5ba2d92a4596f..d9467b8a43ace 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -56,7 +56,7 @@ use crate::ffi::va_list::{VaArgSafe, VaListImpl}; use crate::marker::{ConstParamTy, Destruct, DiscriminantKind, PointeeSized, Tuple}; -use crate::ptr; +use crate::{mem, ptr}; mod bounds; pub mod fallback; @@ -2017,7 +2017,14 @@ pub const unsafe fn unchecked_mul(x: T, y: T) -> T; #[rustc_intrinsic_const_stable_indirect] #[rustc_nounwind] #[rustc_intrinsic] -pub const fn rotate_left(x: T, shift: u32) -> T; +#[rustc_allow_const_fn_unstable(const_trait_impl, funnel_shifts)] +#[miri::intrinsic_fallback_is_spec] +pub const fn rotate_left(x: T, shift: u32) -> T { + // Make sure to call the intrinsic for `funnel_shl`, not the fallback impl. + // SAFETY: we modulo `shift` so that the result is definitely less than the size of + // `T` in bits. + unsafe { unchecked_funnel_shl(x, x, shift % (mem::size_of::() as u32 * 8)) } +} /// Performs rotate right. /// @@ -2032,7 +2039,14 @@ pub const fn rotate_left(x: T, shift: u32) -> T; #[rustc_intrinsic_const_stable_indirect] #[rustc_nounwind] #[rustc_intrinsic] -pub const fn rotate_right(x: T, shift: u32) -> T; +#[rustc_allow_const_fn_unstable(const_trait_impl, funnel_shifts)] +#[miri::intrinsic_fallback_is_spec] +pub const fn rotate_right(x: T, shift: u32) -> T { + // Make sure to call the intrinsic for `funnel_shr`, not the fallback impl. + // SAFETY: we modulo `shift` so that the result is definitely less than the size of + // `T` in bits. + unsafe { unchecked_funnel_shr(x, x, shift % (mem::size_of::() as u32 * 8)) } +} /// Returns (a + b) mod 2N, where N is the width of T in bits. /// diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index 1efc551d670ab..8cdc7e925b68a 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -351,6 +351,7 @@ macro_rules! uint_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline(always)] + #[rustc_allow_const_fn_unstable(const_trait_impl)] // for the intrinsic fallback pub const fn rotate_left(self, n: u32) -> Self { return intrinsics::rotate_left(self, n); } @@ -374,6 +375,7 @@ macro_rules! uint_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline(always)] + #[rustc_allow_const_fn_unstable(const_trait_impl)] // for the intrinsic fallback pub const fn rotate_right(self, n: u32) -> Self { return intrinsics::rotate_right(self, n); } From f4c4b0d02886dde4a0d3aba0bd9e4529e182f556 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 4 Nov 2025 15:33:11 +0100 Subject: [PATCH 275/358] document behavior of rotations for n >= BITS --- core/src/num/int_macros.rs | 10 ++++++++++ core/src/num/uint_macros.rs | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index 16f85c71403ab..d5e60f7cc81b0 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -275,6 +275,10 @@ macro_rules! int_impl { /// Shifts the bits to the left by a specified amount, `n`, /// wrapping the truncated bits to the end of the resulting integer. /// + /// `rotate_left(n)` is equivalent to applying `rotate_left(1)` a total of `n` times. In + /// particular, a rotation by the number of bits in `self` returns the input value + /// unchanged. + /// /// Please note this isn't the same operation as the `<<` shifting operator! /// /// # Examples @@ -284,6 +288,7 @@ macro_rules! int_impl { #[doc = concat!("let m = ", $rot_result, ";")] /// #[doc = concat!("assert_eq!(n.rotate_left(", $rot, "), m);")] + #[doc = concat!("assert_eq!(n.rotate_left(1024), n);")] /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_int_methods", since = "1.32.0")] @@ -298,6 +303,10 @@ macro_rules! int_impl { /// wrapping the truncated bits to the beginning of the resulting /// integer. /// + /// `rotate_right(n)` is equivalent to applying `rotate_right(1)` a total of `n` times. In + /// particular, a rotation by the number of bits in `self` returns the input value + /// unchanged. + /// /// Please note this isn't the same operation as the `>>` shifting operator! /// /// # Examples @@ -307,6 +316,7 @@ macro_rules! int_impl { #[doc = concat!("let m = ", $rot_op, ";")] /// #[doc = concat!("assert_eq!(n.rotate_right(", $rot, "), m);")] + #[doc = concat!("assert_eq!(n.rotate_right(1024), n);")] /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_int_methods", since = "1.32.0")] diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index 8cdc7e925b68a..02b94a092b91c 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -336,6 +336,10 @@ macro_rules! uint_impl { /// Shifts the bits to the left by a specified amount, `n`, /// wrapping the truncated bits to the end of the resulting integer. /// + /// `rotate_left(n)` is equivalent to applying `rotate_left(1)` a total of `n` times. In + /// particular, a rotation by the number of bits in `self` returns the input value + /// unchanged. + /// /// Please note this isn't the same operation as the `<<` shifting operator! /// /// # Examples @@ -345,6 +349,7 @@ macro_rules! uint_impl { #[doc = concat!("let m = ", $rot_result, ";")] /// #[doc = concat!("assert_eq!(n.rotate_left(", $rot, "), m);")] + #[doc = concat!("assert_eq!(n.rotate_left(1024), n);")] /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_math", since = "1.32.0")] @@ -360,6 +365,10 @@ macro_rules! uint_impl { /// wrapping the truncated bits to the beginning of the resulting /// integer. /// + /// `rotate_right(n)` is equivalent to applying `rotate_right(1)` a total of `n` times. In + /// particular, a rotation by the number of bits in `self` returns the input value + /// unchanged. + /// /// Please note this isn't the same operation as the `>>` shifting operator! /// /// # Examples @@ -369,6 +378,7 @@ macro_rules! uint_impl { #[doc = concat!("let m = ", $rot_op, ";")] /// #[doc = concat!("assert_eq!(n.rotate_right(", $rot, "), m);")] + #[doc = concat!("assert_eq!(n.rotate_right(1024), n);")] /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_math", since = "1.32.0")] From 8e6268a788b5aa4a460568756908077729229ccd Mon Sep 17 00:00:00 2001 From: Shun Sakai Date: Wed, 5 Nov 2025 19:19:36 +0900 Subject: [PATCH 276/358] style: Update doctests for `highest_one` and `lowest_one` Use binary literals instead of hex literals. --- core/src/num/int_macros.rs | 16 ++++++++-------- core/src/num/nonzero.rs | 12 ++++++------ core/src/num/uint_macros.rs | 16 ++++++++-------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index 16f85c71403ab..7d395eb780346 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -217,10 +217,10 @@ macro_rules! int_impl { /// ``` /// #![feature(int_lowest_highest_one)] /// - #[doc = concat!("assert_eq!(0x0_", stringify!($SelfT), ".highest_one(), None);")] - #[doc = concat!("assert_eq!(0x1_", stringify!($SelfT), ".highest_one(), Some(0));")] - #[doc = concat!("assert_eq!(0x10_", stringify!($SelfT), ".highest_one(), Some(4));")] - #[doc = concat!("assert_eq!(0x1f_", stringify!($SelfT), ".highest_one(), Some(4));")] + #[doc = concat!("assert_eq!(0b0_", stringify!($SelfT), ".highest_one(), None);")] + #[doc = concat!("assert_eq!(0b1_", stringify!($SelfT), ".highest_one(), Some(0));")] + #[doc = concat!("assert_eq!(0b1_0000_", stringify!($SelfT), ".highest_one(), Some(4));")] + #[doc = concat!("assert_eq!(0b1_1111_", stringify!($SelfT), ".highest_one(), Some(4));")] /// ``` #[unstable(feature = "int_lowest_highest_one", issue = "145203")] #[must_use = "this returns the result of the operation, \ @@ -238,10 +238,10 @@ macro_rules! int_impl { /// ``` /// #![feature(int_lowest_highest_one)] /// - #[doc = concat!("assert_eq!(0x0_", stringify!($SelfT), ".lowest_one(), None);")] - #[doc = concat!("assert_eq!(0x1_", stringify!($SelfT), ".lowest_one(), Some(0));")] - #[doc = concat!("assert_eq!(0x10_", stringify!($SelfT), ".lowest_one(), Some(4));")] - #[doc = concat!("assert_eq!(0x1f_", stringify!($SelfT), ".lowest_one(), Some(0));")] + #[doc = concat!("assert_eq!(0b0_", stringify!($SelfT), ".lowest_one(), None);")] + #[doc = concat!("assert_eq!(0b1_", stringify!($SelfT), ".lowest_one(), Some(0));")] + #[doc = concat!("assert_eq!(0b1_0000_", stringify!($SelfT), ".lowest_one(), Some(4));")] + #[doc = concat!("assert_eq!(0b1_1111_", stringify!($SelfT), ".lowest_one(), Some(0));")] /// ``` #[unstable(feature = "int_lowest_highest_one", issue = "145203")] #[must_use = "this returns the result of the operation, \ diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs index efb0665b7f461..92bca0eebfd93 100644 --- a/core/src/num/nonzero.rs +++ b/core/src/num/nonzero.rs @@ -708,9 +708,9 @@ macro_rules! nonzero_integer { /// # use core::num::NonZero; /// # fn main() { test().unwrap(); } /// # fn test() -> Option<()> { - #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0x1)?.highest_one(), 0);")] - #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0x10)?.highest_one(), 4);")] - #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0x1f)?.highest_one(), 4);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b1)?.highest_one(), 0);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b1_0000)?.highest_one(), 4);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b1_1111)?.highest_one(), 4);")] /// # Some(()) /// # } /// ``` @@ -732,9 +732,9 @@ macro_rules! nonzero_integer { /// # use core::num::NonZero; /// # fn main() { test().unwrap(); } /// # fn test() -> Option<()> { - #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0x1)?.lowest_one(), 0);")] - #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0x10)?.lowest_one(), 4);")] - #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0x1f)?.lowest_one(), 0);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b1)?.lowest_one(), 0);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b1_0000)?.lowest_one(), 4);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b1_1111)?.lowest_one(), 0);")] /// # Some(()) /// # } /// ``` diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index 1efc551d670ab..2996e7b00da4e 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -272,10 +272,10 @@ macro_rules! uint_impl { /// ``` /// #![feature(int_lowest_highest_one)] /// - #[doc = concat!("assert_eq!(0x0_", stringify!($SelfT), ".highest_one(), None);")] - #[doc = concat!("assert_eq!(0x1_", stringify!($SelfT), ".highest_one(), Some(0));")] - #[doc = concat!("assert_eq!(0x10_", stringify!($SelfT), ".highest_one(), Some(4));")] - #[doc = concat!("assert_eq!(0x1f_", stringify!($SelfT), ".highest_one(), Some(4));")] + #[doc = concat!("assert_eq!(0b0_", stringify!($SelfT), ".highest_one(), None);")] + #[doc = concat!("assert_eq!(0b1_", stringify!($SelfT), ".highest_one(), Some(0));")] + #[doc = concat!("assert_eq!(0b1_0000_", stringify!($SelfT), ".highest_one(), Some(4));")] + #[doc = concat!("assert_eq!(0b1_1111_", stringify!($SelfT), ".highest_one(), Some(4));")] /// ``` #[unstable(feature = "int_lowest_highest_one", issue = "145203")] #[must_use = "this returns the result of the operation, \ @@ -296,10 +296,10 @@ macro_rules! uint_impl { /// ``` /// #![feature(int_lowest_highest_one)] /// - #[doc = concat!("assert_eq!(0x0_", stringify!($SelfT), ".lowest_one(), None);")] - #[doc = concat!("assert_eq!(0x1_", stringify!($SelfT), ".lowest_one(), Some(0));")] - #[doc = concat!("assert_eq!(0x10_", stringify!($SelfT), ".lowest_one(), Some(4));")] - #[doc = concat!("assert_eq!(0x1f_", stringify!($SelfT), ".lowest_one(), Some(0));")] + #[doc = concat!("assert_eq!(0b0_", stringify!($SelfT), ".lowest_one(), None);")] + #[doc = concat!("assert_eq!(0b1_", stringify!($SelfT), ".lowest_one(), Some(0));")] + #[doc = concat!("assert_eq!(0b1_0000_", stringify!($SelfT), ".lowest_one(), Some(4));")] + #[doc = concat!("assert_eq!(0b1_1111_", stringify!($SelfT), ".lowest_one(), Some(0));")] /// ``` #[unstable(feature = "int_lowest_highest_one", issue = "145203")] #[must_use = "this returns the result of the operation, \ From 55843ab72fbfda2499de4d6d21576e56d38b29da Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 12:08:27 +0200 Subject: [PATCH 277/358] extract s390x `vector` and friends to their own rust feature --- core/src/lib.rs | 1 + stdarch/crates/core_arch/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/lib.rs b/core/src/lib.rs index f1948fc778ce2..1c0a5631665eb 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -204,6 +204,7 @@ #![feature(riscv_target_feature)] #![feature(rtm_target_feature)] #![feature(s390x_target_feature)] +#![feature(s390x_target_feature_vector)] #![feature(wasm_target_feature)] #![feature(x86_amx_intrinsics)] // tidy-alphabetical-end diff --git a/stdarch/crates/core_arch/src/lib.rs b/stdarch/crates/core_arch/src/lib.rs index 26a9cb5899183..7aa71a49837ef 100644 --- a/stdarch/crates/core_arch/src/lib.rs +++ b/stdarch/crates/core_arch/src/lib.rs @@ -22,7 +22,7 @@ arm_target_feature, mips_target_feature, powerpc_target_feature, - s390x_target_feature, + s390x_target_feature_vector, loongarch_target_feature, wasm_target_feature, abi_unadjusted, From e328224705a86877fe7cc38ba989844ba88e2527 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 12:17:19 +0200 Subject: [PATCH 278/358] `std_detect`: remove unneeded stability lines from s390x features macro --- std_detect/src/detect/arch/s390x.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/std_detect/src/detect/arch/s390x.rs b/std_detect/src/detect/arch/s390x.rs index d59fbc7de3bd6..4c81f55aa8230 100644 --- a/std_detect/src/detect/arch/s390x.rs +++ b/std_detect/src/detect/arch/s390x.rs @@ -12,73 +12,50 @@ features! { #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] concurrent_functions: "concurrent-functions"; /// s390x concurrent-functions facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] deflate_conversion: "deflate-conversion"; /// s390x deflate-conversion facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] enhanced_sort: "enhanced-sort"; /// s390x enhanced-sort facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] guarded_storage: "guarded-storage"; /// s390x guarded-storage facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] high_word: "high-word"; /// s390x high-word facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension3: "message-security-assist-extension3"; /// s390x message-security-assist-extension3 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension4: "message-security-assist-extension4"; /// s390x message-security-assist-extension4 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension5: "message-security-assist-extension5"; /// s390x message-security-assist-extension5 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension8: "message-security-assist-extension8"; /// s390x message-security-assist-extension8 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension9: "message-security-assist-extension9"; /// s390x message-security-assist-extension9 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension12: "message-security-assist-extension12"; /// s390x message-security-assist-extension12 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_2: "miscellaneous-extensions-2"; /// s390x miscellaneous-extensions-2 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_3: "miscellaneous-extensions-3"; /// s390x miscellaneous-extensions-3 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_4: "miscellaneous-extensions-4"; /// s390x miscellaneous-extensions-4 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] nnp_assist: "nnp-assist"; /// s390x nnp-assist facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] transactional_execution: "transactional-execution"; /// s390x transactional-execution facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector: "vector"; /// s390x vector facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_1: "vector-enhancements-1"; /// s390x vector-enhancements-1 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_2: "vector-enhancements-2"; /// s390x vector-enhancements-2 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_3: "vector-enhancements-3"; /// s390x vector-enhancements-3 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal: "vector-packed-decimal"; /// s390x vector-packed-decimal facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement"; /// s390x vector-packed-decimal-enhancement facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2"; /// s390x vector-packed-decimal-enhancement-2 facility - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3"; /// s390x vector-packed-decimal-enhancement-3 facility } From ae6df2a66f49a4543c70588def6cdea97eeef543 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 12:23:22 +0200 Subject: [PATCH 279/358] `std_detect`: give s390x features more accurate features / tracking issues --- std_detect/src/detect/arch/s390x.rs | 48 ++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/std_detect/src/detect/arch/s390x.rs b/std_detect/src/detect/arch/s390x.rs index 4c81f55aa8230..da3c8fce036ca 100644 --- a/std_detect/src/detect/arch/s390x.rs +++ b/std_detect/src/detect/arch/s390x.rs @@ -10,52 +10,52 @@ features! { /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) /// the macro expands to `true`. #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] concurrent_functions: "concurrent-functions"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] concurrent_functions: "concurrent-functions"; /// s390x concurrent-functions facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] deflate_conversion: "deflate-conversion"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] deflate_conversion: "deflate-conversion"; /// s390x deflate-conversion facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] enhanced_sort: "enhanced-sort"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] enhanced_sort: "enhanced-sort"; /// s390x enhanced-sort facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] guarded_storage: "guarded-storage"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] guarded_storage: "guarded-storage"; /// s390x guarded-storage facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] high_word: "high-word"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] high_word: "high-word"; /// s390x high-word facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension3: "message-security-assist-extension3"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] message_security_assist_extension3: "message-security-assist-extension3"; /// s390x message-security-assist-extension3 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension4: "message-security-assist-extension4"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] message_security_assist_extension4: "message-security-assist-extension4"; /// s390x message-security-assist-extension4 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension5: "message-security-assist-extension5"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] message_security_assist_extension5: "message-security-assist-extension5"; /// s390x message-security-assist-extension5 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension8: "message-security-assist-extension8"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] message_security_assist_extension8: "message-security-assist-extension8"; /// s390x message-security-assist-extension8 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension9: "message-security-assist-extension9"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] message_security_assist_extension9: "message-security-assist-extension9"; /// s390x message-security-assist-extension9 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension12: "message-security-assist-extension12"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] message_security_assist_extension12: "message-security-assist-extension12"; /// s390x message-security-assist-extension12 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_2: "miscellaneous-extensions-2"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] miscellaneous_extensions_2: "miscellaneous-extensions-2"; /// s390x miscellaneous-extensions-2 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_3: "miscellaneous-extensions-3"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] miscellaneous_extensions_3: "miscellaneous-extensions-3"; /// s390x miscellaneous-extensions-3 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_4: "miscellaneous-extensions-4"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] miscellaneous_extensions_4: "miscellaneous-extensions-4"; /// s390x miscellaneous-extensions-4 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] nnp_assist: "nnp-assist"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] nnp_assist: "nnp-assist"; /// s390x nnp-assist facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] transactional_execution: "transactional-execution"; + @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] transactional_execution: "transactional-execution"; /// s390x transactional-execution facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector: "vector"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector: "vector"; /// s390x vector facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_1: "vector-enhancements-1"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_enhancements_1: "vector-enhancements-1"; /// s390x vector-enhancements-1 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_2: "vector-enhancements-2"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_enhancements_2: "vector-enhancements-2"; /// s390x vector-enhancements-2 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_3: "vector-enhancements-3"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_enhancements_3: "vector-enhancements-3"; /// s390x vector-enhancements-3 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal: "vector-packed-decimal"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_packed_decimal: "vector-packed-decimal"; /// s390x vector-packed-decimal facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement"; /// s390x vector-packed-decimal-enhancement facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2"; /// s390x vector-packed-decimal-enhancement-2 facility - @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3"; + @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3"; /// s390x vector-packed-decimal-enhancement-3 facility } From 05b876c55a35f1e2579d487157db69ea492ade3e Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 12:41:19 +0200 Subject: [PATCH 280/358] stabilize `stdarch_s390x_feature_detection` --- std/src/lib.rs | 2 +- std/tests/run-time-detect.rs | 4 ---- std_detect/src/detect/arch/mod.rs | 2 +- std_detect/src/detect/arch/s390x.rs | 2 +- std_detect/tests/cpu-detection.rs | 1 - std_detect/tests/macro_trailing_commas.rs | 1 - stdarch/crates/core_arch/src/lib.rs | 6 +----- 7 files changed, 4 insertions(+), 14 deletions(-) diff --git a/std/src/lib.rs b/std/src/lib.rs index a8c50cec01e0b..7b6cfbfe0f259 100644 --- a/std/src/lib.rs +++ b/std/src/lib.rs @@ -672,7 +672,7 @@ pub mod arch { pub use std_detect::is_loongarch_feature_detected; #[unstable(feature = "is_riscv_feature_detected", issue = "111192")] pub use std_detect::is_riscv_feature_detected; - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + #[stable(feature = "stdarch_s390x_feature_detection", since = "CURRENT_RUSTC_VERSION")] pub use std_detect::is_s390x_feature_detected; #[stable(feature = "simd_x86", since = "1.27.0")] pub use std_detect::is_x86_feature_detected; diff --git a/std/tests/run-time-detect.rs b/std/tests/run-time-detect.rs index ae0c3385d2ad9..b2c3d0d3f9f8b 100644 --- a/std/tests/run-time-detect.rs +++ b/std/tests/run-time-detect.rs @@ -8,10 +8,6 @@ all(target_arch = "aarch64", any(target_os = "linux", target_os = "android")), feature(stdarch_aarch64_feature_detection) )] -#![cfg_attr( - all(target_arch = "s390x", target_os = "linux"), - feature(stdarch_s390x_feature_detection) -)] #![cfg_attr( all(target_arch = "powerpc", target_os = "linux"), feature(stdarch_powerpc_feature_detection) diff --git a/std_detect/src/detect/arch/mod.rs b/std_detect/src/detect/arch/mod.rs index c066b9cc68155..23e7a30b985bf 100644 --- a/std_detect/src/detect/arch/mod.rs +++ b/std_detect/src/detect/arch/mod.rs @@ -60,7 +60,7 @@ cfg_select! { pub use loongarch::*; } target_arch = "s390x" => { - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + #[stable(feature = "stdarch_s390x_feature_detection", since = "CURRENT_RUSTC_VERSION")] pub use s390x::*; } _ => { diff --git a/std_detect/src/detect/arch/s390x.rs b/std_detect/src/detect/arch/s390x.rs index da3c8fce036ca..44961abcf3a0a 100644 --- a/std_detect/src/detect/arch/s390x.rs +++ b/std_detect/src/detect/arch/s390x.rs @@ -9,7 +9,7 @@ features! { /// /// When the feature is known to be enabled at compile time (e.g. via `-Ctarget-feature`) /// the macro expands to `true`. - #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + #[stable(feature = "stdarch_s390x_feature_detection", since = "CURRENT_RUSTC_VERSION")] @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] concurrent_functions: "concurrent-functions"; /// s390x concurrent-functions facility @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] deflate_conversion: "deflate-conversion"; diff --git a/std_detect/tests/cpu-detection.rs b/std_detect/tests/cpu-detection.rs index 0c4fa57f2b465..3e36abda97dfa 100644 --- a/std_detect/tests/cpu-detection.rs +++ b/std_detect/tests/cpu-detection.rs @@ -11,7 +11,6 @@ )] #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))] #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))] -#![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))] #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)] #[cfg_attr( diff --git a/std_detect/tests/macro_trailing_commas.rs b/std_detect/tests/macro_trailing_commas.rs index 6072ddf5ac45e..29bd3f1162a42 100644 --- a/std_detect/tests/macro_trailing_commas.rs +++ b/std_detect/tests/macro_trailing_commas.rs @@ -25,7 +25,6 @@ any(target_arch = "powerpc", target_arch = "powerpc64"), feature(stdarch_powerpc_feature_detection) )] -#![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))] #![cfg_attr( any(target_arch = "riscv32", target_arch = "riscv64"), feature(stdarch_riscv_feature_detection) diff --git a/stdarch/crates/core_arch/src/lib.rs b/stdarch/crates/core_arch/src/lib.rs index 7aa71a49837ef..ed9138f343a18 100644 --- a/stdarch/crates/core_arch/src/lib.rs +++ b/stdarch/crates/core_arch/src/lib.rs @@ -64,11 +64,7 @@ )] #![cfg_attr( test, - feature( - stdarch_arm_feature_detection, - stdarch_powerpc_feature_detection, - stdarch_s390x_feature_detection - ) + feature(stdarch_arm_feature_detection, stdarch_powerpc_feature_detection,) )] #[cfg(test)] From a56f595eaf4d20bda6b0725fca9b6c63cee1fd9d Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 13:04:58 +0200 Subject: [PATCH 281/358] stabilize `s390x_target_feature_vector` --- core/src/lib.rs | 1 - std_detect/src/detect/arch/s390x.rs | 24 ++++++++++++------------ stdarch/crates/core_arch/src/lib.rs | 1 - 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/core/src/lib.rs b/core/src/lib.rs index 1c0a5631665eb..f1948fc778ce2 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -204,7 +204,6 @@ #![feature(riscv_target_feature)] #![feature(rtm_target_feature)] #![feature(s390x_target_feature)] -#![feature(s390x_target_feature_vector)] #![feature(wasm_target_feature)] #![feature(x86_amx_intrinsics)] // tidy-alphabetical-end diff --git a/std_detect/src/detect/arch/s390x.rs b/std_detect/src/detect/arch/s390x.rs index 44961abcf3a0a..6122e8f5b8377 100644 --- a/std_detect/src/detect/arch/s390x.rs +++ b/std_detect/src/detect/arch/s390x.rs @@ -32,30 +32,30 @@ features! { /// s390x message-security-assist-extension9 facility @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] message_security_assist_extension12: "message-security-assist-extension12"; /// s390x message-security-assist-extension12 facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] miscellaneous_extensions_2: "miscellaneous-extensions-2"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] miscellaneous_extensions_2: "miscellaneous-extensions-2"; /// s390x miscellaneous-extensions-2 facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] miscellaneous_extensions_3: "miscellaneous-extensions-3"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] miscellaneous_extensions_3: "miscellaneous-extensions-3"; /// s390x miscellaneous-extensions-3 facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] miscellaneous_extensions_4: "miscellaneous-extensions-4"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] miscellaneous_extensions_4: "miscellaneous-extensions-4"; /// s390x miscellaneous-extensions-4 facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] nnp_assist: "nnp-assist"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] nnp_assist: "nnp-assist"; /// s390x nnp-assist facility @FEATURE: #[unstable(feature = "s390x_target_feature", issue = "44839")] transactional_execution: "transactional-execution"; /// s390x transactional-execution facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector: "vector"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] vector: "vector"; /// s390x vector facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_enhancements_1: "vector-enhancements-1"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] vector_enhancements_1: "vector-enhancements-1"; /// s390x vector-enhancements-1 facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_enhancements_2: "vector-enhancements-2"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] vector_enhancements_2: "vector-enhancements-2"; /// s390x vector-enhancements-2 facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_enhancements_3: "vector-enhancements-3"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] vector_enhancements_3: "vector-enhancements-3"; /// s390x vector-enhancements-3 facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_packed_decimal: "vector-packed-decimal"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] vector_packed_decimal: "vector-packed-decimal"; /// s390x vector-packed-decimal facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement"; /// s390x vector-packed-decimal-enhancement facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2"; /// s390x vector-packed-decimal-enhancement-2 facility - @FEATURE: #[unstable(feature = "s390x_target_feature_vector", issue = "145649")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3"; + @FEATURE: #[stable(feature = "s390x_target_feature_vector", since = "CURRENT_RUSTC_VERSION")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3"; /// s390x vector-packed-decimal-enhancement-3 facility } diff --git a/stdarch/crates/core_arch/src/lib.rs b/stdarch/crates/core_arch/src/lib.rs index ed9138f343a18..06cbd32d67729 100644 --- a/stdarch/crates/core_arch/src/lib.rs +++ b/stdarch/crates/core_arch/src/lib.rs @@ -22,7 +22,6 @@ arm_target_feature, mips_target_feature, powerpc_target_feature, - s390x_target_feature_vector, loongarch_target_feature, wasm_target_feature, abi_unadjusted, From 074f18aa0f4690a3f6a5fbc23739d434c789193e Mon Sep 17 00:00:00 2001 From: Waffle Lapkin Date: Thu, 6 Nov 2025 15:22:30 +0100 Subject: [PATCH 282/358] core docs: add notes about availability of `Atomic*::from_mut_slice` --- core/src/sync/atomic.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/src/sync/atomic.rs b/core/src/sync/atomic.rs index 30a42d4eb5e64..a3ceac89ef128 100644 --- a/core/src/sync/atomic.rs +++ b/core/src/sync/atomic.rs @@ -1558,6 +1558,8 @@ impl AtomicPtr { /// Gets atomic access to a pointer. /// + /// **Note:** This function is only available on targets where `AtomicPtr` has the same alignment as `*const T` + /// /// # Examples /// /// ``` @@ -1625,6 +1627,8 @@ impl AtomicPtr { /// Gets atomic access to a slice of pointers. /// + /// **Note:** This function is only available on targets where `AtomicPtr` has the same alignment as `*const T` + /// /// # Examples /// /// ```ignore-wasm @@ -2804,6 +2808,14 @@ macro_rules! atomic_int { #[doc = concat!("Get atomic access to a `&mut [", stringify!($int_type), "]` slice.")] /// + #[doc = if_8_bit! { + $int_type, + no = [ + "**Note:** This function is only available on targets where `", + stringify!($atomic_type), "` has the same alignment as `", stringify!($int_type), "`." + ], + }] + /// /// # Examples /// /// ```ignore-wasm From b0a5ab651dea529f9321a7b5c6882c96942d7458 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 6 Nov 2025 18:07:49 +0100 Subject: [PATCH 283/358] stabilize duration_from_nanos_u128 --- core/src/time.rs | 7 +++---- coretests/tests/lib.rs | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/core/src/time.rs b/core/src/time.rs index f721fcd6156cf..51a01545f5cf5 100644 --- a/core/src/time.rs +++ b/core/src/time.rs @@ -317,7 +317,6 @@ impl Duration { /// # Examples /// /// ``` - /// #![feature(duration_from_nanos_u128)] /// use std::time::Duration; /// /// let nanos = 10_u128.pow(24) + 321; @@ -326,12 +325,12 @@ impl Duration { /// assert_eq!(10_u64.pow(15), duration.as_secs()); /// assert_eq!(321, duration.subsec_nanos()); /// ``` - #[unstable(feature = "duration_from_nanos_u128", issue = "139201")] - // This is necessary because of const `try_from`, but can be removed if a trait-free impl is used instead - #[rustc_const_unstable(feature = "duration_from_nanos_u128", issue = "139201")] + #[stable(feature = "duration_from_nanos_u128", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "duration_from_nanos_u128", since = "CURRENT_RUSTC_VERSION")] #[must_use] #[inline] #[track_caller] + #[rustc_allow_const_fn_unstable(const_trait_impl, const_convert)] // for `u64::try_from` pub const fn from_nanos_u128(nanos: u128) -> Duration { const NANOS_PER_SEC: u128 = self::NANOS_PER_SEC as u128; let Ok(secs) = u64::try_from(nanos / NANOS_PER_SEC) else { diff --git a/coretests/tests/lib.rs b/coretests/tests/lib.rs index 80b62038c40ec..e190536abcf9f 100644 --- a/coretests/tests/lib.rs +++ b/coretests/tests/lib.rs @@ -42,7 +42,6 @@ #![feature(drop_guard)] #![feature(duration_constants)] #![feature(duration_constructors)] -#![feature(duration_from_nanos_u128)] #![feature(error_generic_member_access)] #![feature(exact_div)] #![feature(exact_size_is_empty)] From 075625f6099cf1877f3ff8d05697761f2f874a81 Mon Sep 17 00:00:00 2001 From: Dmitry Marakasov Date: Fri, 7 Nov 2025 00:58:04 +0300 Subject: [PATCH 284/358] Sync str::rsplit_once example with str::split_once --- core/src/str/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/str/mod.rs b/core/src/str/mod.rs index 37dc401ed0098..45308c4b3e9c5 100644 --- a/core/src/str/mod.rs +++ b/core/src/str/mod.rs @@ -1953,6 +1953,7 @@ impl str { /// /// ``` /// assert_eq!("cfg".rsplit_once('='), None); + /// assert_eq!("cfg=".rsplit_once('='), Some(("cfg", ""))); /// assert_eq!("cfg=foo".rsplit_once('='), Some(("cfg", "foo"))); /// assert_eq!("cfg=foo=bar".rsplit_once('='), Some(("cfg=foo", "bar"))); /// ``` From a33fc7214898075b85d5ea147513a535656ca9c2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 12 Oct 2025 22:41:03 -0700 Subject: [PATCH 285/358] Add -Zannotate-moves for profiler visibility of move/copy operations This implements a new unstable compiler flag `-Zannotate-moves` that makes move and copy operations visible in profilers by creating synthetic debug information. This is achieved with zero runtime cost by manipulating debug info scopes to make moves/copies appear as calls to `compiler_move` and `compiler_copy` marker functions in profiling tools. This allows developers to identify expensive move/copy operations in their code using standard profiling tools, without requiring specialized tooling or runtime instrumentation. The implementation works at codegen time. When processing MIR operands (`Operand::Move` and `Operand::Copy`), the codegen creates an `OperandRef` with an optional `move_annotation` field containing an `Instance` of the appropriate profiling marker function. When storing the operand, `store_with_annotation()` wraps the store operation in a synthetic debug scope that makes it appear inlined from the marker. Two marker functions (`compiler_move` and `compiler_copy`) are defined in `library/core/src/profiling.rs`. These are never actually called - they exist solely as debug info anchors. Operations are only annotated if the type: - Meets the size threshold (default: 65 bytes, configurable via `-Zannotate-moves=SIZE`) - Has a non-scalar backend representation (scalars use registers, not memcpy) This has a very small size impact on object file size. With the default limit it's well under 0.1%, and even with a very small limit of 8 bytes it's still ~1.5%. This could be enabled by default. --- core/src/lib.rs | 2 ++ core/src/profiling.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 core/src/profiling.rs diff --git a/core/src/lib.rs b/core/src/lib.rs index f1948fc778ce2..121d26c8a3e3f 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -282,6 +282,8 @@ pub mod num; pub mod hint; pub mod intrinsics; pub mod mem; +#[unstable(feature = "profiling_marker_api", issue = "148197")] +pub mod profiling; pub mod ptr; #[unstable(feature = "ub_checks", issue = "none")] pub mod ub_checks; diff --git a/core/src/profiling.rs b/core/src/profiling.rs new file mode 100644 index 0000000000000..db4a62480a3a1 --- /dev/null +++ b/core/src/profiling.rs @@ -0,0 +1,33 @@ +//! Profiling markers for compiler instrumentation. + +/// Profiling marker for move operations. +/// +/// This function is never called at runtime. When `-Z annotate-moves` is enabled, +/// the compiler creates synthetic debug info that makes move operations appear as +/// calls to this function in profilers. +/// +/// The `SIZE` parameter encodes the size of the type being copied. It's the same as +/// `size_of::()`, and is only present for convenience. +#[unstable(feature = "profiling_marker_api", issue = "148197")] +#[lang = "compiler_move"] +pub fn compiler_move(_src: *const T, _dst: *mut T) { + unreachable!( + "compiler_move marks where the compiler-generated a memcpy for moves. It is never actually called." + ) +} + +/// Profiling marker for copy operations. +/// +/// This function is never called at runtime. When `-Z annotate-moves` is enabled, +/// the compiler creates synthetic debug info that makes copy operations appear as +/// calls to this function in profilers. +/// +/// The `SIZE` parameter encodes the size of the type being copied. It's the same as +/// `size_of::()`, and is only present for convenience. +#[unstable(feature = "profiling_marker_api", issue = "148197")] +#[lang = "compiler_copy"] +pub fn compiler_copy(_src: *const T, _dst: *mut T) { + unreachable!( + "compiler_copy marks where the compiler-generated a memcpy for Copies. It is never actually called." + ) +} From 26ca3defde91c272826246af7a5164512ed12c24 Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Fri, 7 Nov 2025 04:36:12 +0000 Subject: [PATCH 286/358] Enable std locking functions on AIX This patch enables the std locking functions on AIX by including AIX on the list of supported targets for the locking functions. Excluding AIX from the std locking functions results to compilation errors such as: ("try_lock() not supported"). --- std/src/sys/fs/unix.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index 129fccdbf4197..cadcfddb0f7f8 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -1296,6 +1296,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "illumos", + target_os = "aix", target_vendor = "apple", ))] pub fn lock(&self) -> io::Result<()> { @@ -1321,6 +1322,7 @@ impl File { target_os = "cygwin", target_os = "solaris", target_os = "illumos", + target_os = "aix", target_vendor = "apple", )))] pub fn lock(&self) -> io::Result<()> { @@ -1335,6 +1337,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "illumos", + target_os = "aix", target_vendor = "apple", ))] pub fn lock_shared(&self) -> io::Result<()> { @@ -1360,6 +1363,7 @@ impl File { target_os = "cygwin", target_os = "solaris", target_os = "illumos", + target_os = "aix", target_vendor = "apple", )))] pub fn lock_shared(&self) -> io::Result<()> { @@ -1374,6 +1378,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "illumos", + target_os = "aix", target_vendor = "apple", ))] pub fn try_lock(&self) -> Result<(), TryLockError> { @@ -1415,6 +1420,7 @@ impl File { target_os = "cygwin", target_os = "solaris", target_os = "illumos", + target_os = "aix", target_vendor = "apple", )))] pub fn try_lock(&self) -> Result<(), TryLockError> { @@ -1432,6 +1438,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "illumos", + target_os = "aix", target_vendor = "apple", ))] pub fn try_lock_shared(&self) -> Result<(), TryLockError> { @@ -1473,6 +1480,7 @@ impl File { target_os = "cygwin", target_os = "solaris", target_os = "illumos", + target_os = "aix", target_vendor = "apple", )))] pub fn try_lock_shared(&self) -> Result<(), TryLockError> { @@ -1490,6 +1498,7 @@ impl File { target_os = "openbsd", target_os = "cygwin", target_os = "illumos", + target_os = "aix", target_vendor = "apple", ))] pub fn unlock(&self) -> io::Result<()> { @@ -1515,6 +1524,7 @@ impl File { target_os = "cygwin", target_os = "solaris", target_os = "illumos", + target_os = "aix", target_vendor = "apple", )))] pub fn unlock(&self) -> io::Result<()> { From a29dcce55098e8c101cd776a77f13a8da0a069c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Bj=C3=B8rnager=20Jensen?= Date: Fri, 7 Nov 2025 15:10:04 +0100 Subject: [PATCH 287/358] Stabilise 'as_array' in '[_]' and '*const [_]'; Stabilise 'as_mut_array' in '[_]' and '*mut [_]'; Update feature gate and tracking issue for 'alloc_slice_into_array' items; --- alloc/src/boxed.rs | 2 +- alloc/src/rc.rs | 2 +- alloc/src/sync.rs | 2 +- core/src/lib.rs | 1 - core/src/ptr/const_ptr.rs | 3 ++- core/src/ptr/mut_ptr.rs | 3 ++- core/src/slice/mod.rs | 6 ++++-- 7 files changed, 11 insertions(+), 8 deletions(-) diff --git a/alloc/src/boxed.rs b/alloc/src/boxed.rs index 7ad1679b1c822..381eaeb809f27 100644 --- a/alloc/src/boxed.rs +++ b/alloc/src/boxed.rs @@ -850,7 +850,7 @@ impl Box<[T]> { /// This operation does not reallocate; the underlying array of the slice is simply reinterpreted as an array type. /// /// If `N` is not exactly equal to the length of `self`, then this method returns `None`. - #[unstable(feature = "slice_as_array", issue = "133508")] + #[unstable(feature = "alloc_slice_into_array", issue = "148082")] #[inline] #[must_use] pub fn into_array(self) -> Option> { diff --git a/alloc/src/rc.rs b/alloc/src/rc.rs index a24ea6e526c4b..f0ce6aa03a8ba 100644 --- a/alloc/src/rc.rs +++ b/alloc/src/rc.rs @@ -1166,7 +1166,7 @@ impl Rc<[T]> { /// This operation does not reallocate; the underlying array of the slice is simply reinterpreted as an array type. /// /// If `N` is not exactly equal to the length of `self`, then this method returns `None`. - #[unstable(feature = "slice_as_array", issue = "133508")] + #[unstable(feature = "alloc_slice_into_array", issue = "148082")] #[inline] #[must_use] pub fn into_array(self) -> Option> { diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs index 13b5cf23e72d8..b85293973fd5c 100644 --- a/alloc/src/sync.rs +++ b/alloc/src/sync.rs @@ -1314,7 +1314,7 @@ impl Arc<[T]> { /// This operation does not reallocate; the underlying array of the slice is simply reinterpreted as an array type. /// /// If `N` is not exactly equal to the length of `self`, then this method returns `None`. - #[unstable(feature = "slice_as_array", issue = "133508")] + #[unstable(feature = "alloc_slice_into_array", issue = "148082")] #[inline] #[must_use] pub fn into_array(self) -> Option> { diff --git a/core/src/lib.rs b/core/src/lib.rs index f1948fc778ce2..1f66c43c73dbe 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -121,7 +121,6 @@ #![feature(ptr_alignment_type)] #![feature(ptr_metadata)] #![feature(set_ptr_value)] -#![feature(slice_as_array)] #![feature(slice_ptr_get)] #![feature(str_internals)] #![feature(str_split_inclusive_remainder)] diff --git a/core/src/ptr/const_ptr.rs b/core/src/ptr/const_ptr.rs index 451092709443b..84a6982d56805 100644 --- a/core/src/ptr/const_ptr.rs +++ b/core/src/ptr/const_ptr.rs @@ -1462,7 +1462,8 @@ impl *const [T] { /// Gets a raw pointer to the underlying array. /// /// If `N` is not exactly equal to the length of `self`, then this method returns `None`. - #[unstable(feature = "slice_as_array", issue = "133508")] + #[stable(feature = "core_slice_as_array", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "core_slice_as_array", since = "CURRENT_RUSTC_VERSION")] #[inline] #[must_use] pub const fn as_array(self) -> Option<*const [T; N]> { diff --git a/core/src/ptr/mut_ptr.rs b/core/src/ptr/mut_ptr.rs index 24ee92bdd6e1b..85d54b4d3b9b3 100644 --- a/core/src/ptr/mut_ptr.rs +++ b/core/src/ptr/mut_ptr.rs @@ -1712,7 +1712,8 @@ impl *mut [T] { /// Gets a raw, mutable pointer to the underlying array. /// /// If `N` is not exactly equal to the length of `self`, then this method returns `None`. - #[unstable(feature = "slice_as_array", issue = "133508")] + #[stable(feature = "core_slice_as_array", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "core_slice_as_array", since = "CURRENT_RUSTC_VERSION")] #[inline] #[must_use] pub const fn as_mut_array(self) -> Option<*mut [T; N]> { diff --git a/core/src/slice/mod.rs b/core/src/slice/mod.rs index 96c1034f9735f..1d88eb33dce10 100644 --- a/core/src/slice/mod.rs +++ b/core/src/slice/mod.rs @@ -841,7 +841,8 @@ impl [T] { /// Gets a reference to the underlying array. /// /// If `N` is not exactly equal to the length of `self`, then this method returns `None`. - #[unstable(feature = "slice_as_array", issue = "133508")] + #[stable(feature = "core_slice_as_array", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "core_slice_as_array", since = "CURRENT_RUSTC_VERSION")] #[inline] #[must_use] pub const fn as_array(&self) -> Option<&[T; N]> { @@ -859,7 +860,8 @@ impl [T] { /// Gets a mutable reference to the slice's underlying array. /// /// If `N` is not exactly equal to the length of `self`, then this method returns `None`. - #[unstable(feature = "slice_as_array", issue = "133508")] + #[stable(feature = "core_slice_as_array", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "core_slice_as_array", since = "CURRENT_RUSTC_VERSION")] #[inline] #[must_use] pub const fn as_mut_array(&mut self) -> Option<&mut [T; N]> { From 505361dc0b7bc0eab531cf70a7fb8cced36e888e Mon Sep 17 00:00:00 2001 From: Vrtgs Date: Tue, 14 Oct 2025 18:47:50 +0300 Subject: [PATCH 288/358] update isolate_highest_one for NonZero --- core/src/num/nonzero.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs index fcdb65bd45c95..cb8838f1eece6 100644 --- a/core/src/num/nonzero.rs +++ b/core/src/num/nonzero.rs @@ -660,12 +660,15 @@ macro_rules! nonzero_integer { without modifying the original"] #[inline(always)] pub const fn isolate_highest_one(self) -> Self { - let n = self.get() & (((1 as $Int) << (<$Int>::BITS - 1)).wrapping_shr(self.leading_zeros())); - // SAFETY: // `self` is non-zero, so masking to preserve only the most // significant set bit will result in a non-zero `n`. - unsafe { NonZero::new_unchecked(n) } + // and self.leading_zeros() is always < $INT::BITS since + // at least one of the bits in the number is not zero + unsafe { + let bit = (((1 as $Uint) << (<$Uint>::BITS - 1)).unchecked_shr(self.leading_zeros())); + NonZero::new_unchecked(bit as $Int) + } } /// Returns `self` with only the least significant bit set. From 26fc2856a611f6d756aaa36efb70ac78e40a3f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Le=C3=B3n=20Orell=20Valerian=20Liehr?= Date: Sat, 8 Nov 2025 06:40:27 +0100 Subject: [PATCH 289/358] Replace `#[const_trait]` with `const` in libcore --- core/src/array/equality.rs | 3 +-- core/src/cmp/bytewise.rs | 3 +-- core/src/ops/range.rs | 9 +++------ core/src/slice/cmp.rs | 15 +++++---------- core/src/slice/index.rs | 3 +-- 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/core/src/array/equality.rs b/core/src/array/equality.rs index c2c7ccf0daa23..ec79a657e58e2 100644 --- a/core/src/array/equality.rs +++ b/core/src/array/equality.rs @@ -132,9 +132,8 @@ where #[rustc_const_unstable(feature = "const_cmp", issue = "143800")] impl const Eq for [T; N] {} -#[const_trait] #[rustc_const_unstable(feature = "const_cmp", issue = "143800")] -trait SpecArrayEq: Sized { +const trait SpecArrayEq: Sized { fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool; fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool; } diff --git a/core/src/cmp/bytewise.rs b/core/src/cmp/bytewise.rs index 2265fa7a3531c..f0f5f656405a9 100644 --- a/core/src/cmp/bytewise.rs +++ b/core/src/cmp/bytewise.rs @@ -17,8 +17,7 @@ use crate::num::NonZero; /// - Neither `Self` nor `Rhs` have provenance, so integer comparisons are correct. /// - `>::{eq,ne}` are equivalent to comparing the bytes. #[rustc_specialization_trait] -#[const_trait] // FIXME(const_trait_impl): Migrate to `const unsafe trait` once #146122 is fixed. -pub(crate) unsafe trait BytewiseEq: +pub(crate) const unsafe trait BytewiseEq: [const] PartialEq + Sized { } diff --git a/core/src/ops/range.rs b/core/src/ops/range.rs index d781f3f7ace4a..a0b74ff383ea4 100644 --- a/core/src/ops/range.rs +++ b/core/src/ops/range.rs @@ -816,9 +816,8 @@ impl Bound<&T> { /// by range syntax like `..`, `a..`, `..b`, `..=c`, `d..e`, or `f..=g`. #[stable(feature = "collections_range", since = "1.28.0")] #[rustc_diagnostic_item = "RangeBounds"] -#[const_trait] #[rustc_const_unstable(feature = "const_range", issue = "none")] -pub trait RangeBounds { +pub const trait RangeBounds { /// Start index bound. /// /// Returns the start value as a `Bound`. @@ -954,9 +953,8 @@ pub trait RangeBounds { /// `IntoBounds` is implemented by Rust’s built-in range types, produced /// by range syntax like `..`, `a..`, `..b`, `..=c`, `d..e`, or `f..=g`. #[unstable(feature = "range_into_bounds", issue = "136903")] -#[const_trait] #[rustc_const_unstable(feature = "const_range", issue = "none")] -pub trait IntoBounds: [const] RangeBounds { +pub const trait IntoBounds: [const] RangeBounds { /// Convert this range into the start and end bounds. /// Returns `(start_bound, end_bound)`. /// @@ -1319,9 +1317,8 @@ pub enum OneSidedRangeBound { /// Types that implement `OneSidedRange` must return `Bound::Unbounded` /// from one of `RangeBounds::start_bound` or `RangeBounds::end_bound`. #[unstable(feature = "one_sided_range", issue = "69780")] -#[const_trait] #[rustc_const_unstable(feature = "const_range", issue = "none")] -pub trait OneSidedRange: RangeBounds { +pub const trait OneSidedRange: RangeBounds { /// An internal-only helper function for `split_off` and /// `split_off_mut` that returns the bound of the one-sided range. fn bound(self) -> (OneSidedRangeBound, T); diff --git a/core/src/slice/cmp.rs b/core/src/slice/cmp.rs index 103630aba0f79..fd1ca23fb79c5 100644 --- a/core/src/slice/cmp.rs +++ b/core/src/slice/cmp.rs @@ -155,18 +155,16 @@ where } #[doc(hidden)] -#[const_trait] #[rustc_const_unstable(feature = "const_cmp", issue = "143800")] // intermediate trait for specialization of slice's PartialOrd -trait SlicePartialOrd: Sized { +const trait SlicePartialOrd: Sized { fn partial_compare(left: &[Self], right: &[Self]) -> Option; } #[doc(hidden)] -#[const_trait] #[rustc_const_unstable(feature = "const_cmp", issue = "143800")] // intermediate trait for specialization of slice's PartialOrd chaining methods -trait SliceChain: Sized { +const trait SliceChain: Sized { fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow; fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow; fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow; @@ -244,9 +242,8 @@ impl const SlicePartialOrd for A { } #[rustc_specialization_trait] -#[const_trait] #[rustc_const_unstable(feature = "const_cmp", issue = "143800")] -trait AlwaysApplicableOrd: [const] SliceOrd + [const] Ord {} +const trait AlwaysApplicableOrd: [const] SliceOrd + [const] Ord {} macro_rules! always_applicable_ord { ($([$($p:tt)*] $t:ty,)*) => { @@ -265,10 +262,9 @@ always_applicable_ord! { } #[doc(hidden)] -#[const_trait] #[rustc_const_unstable(feature = "const_cmp", issue = "143800")] // intermediate trait for specialization of slice's Ord -trait SliceOrd: Sized { +const trait SliceOrd: Sized { fn compare(left: &[Self], right: &[Self]) -> Ordering; } @@ -292,8 +288,7 @@ impl SliceOrd for A { /// * For every `x` and `y` of this type, `Ord(x, y)` must return the same /// value as `Ord::cmp(transmute::<_, u8>(x), transmute::<_, u8>(y))`. #[rustc_specialization_trait] -#[const_trait] -unsafe trait UnsignedBytewiseOrd: [const] Ord {} +const unsafe trait UnsignedBytewiseOrd: [const] Ord {} #[rustc_const_unstable(feature = "const_cmp", issue = "143800")] unsafe impl const UnsignedBytewiseOrd for bool {} diff --git a/core/src/slice/index.rs b/core/src/slice/index.rs index 1a4dcebf36483..d8ed521f44353 100644 --- a/core/src/slice/index.rs +++ b/core/src/slice/index.rs @@ -159,9 +159,8 @@ mod private_slice_index { message = "the type `{T}` cannot be indexed by `{Self}`", label = "slice indices are of type `usize` or ranges of `usize`" )] -#[const_trait] // FIXME(const_trait_impl): Migrate to `const unsafe trait` once #146122 is fixed. #[rustc_const_unstable(feature = "const_index", issue = "143775")] -pub unsafe trait SliceIndex: private_slice_index::Sealed { +pub const unsafe trait SliceIndex: private_slice_index::Sealed { /// The output type returned by methods. #[stable(feature = "slice_get_slice", since = "1.28.0")] type Output: ?Sized; From 88284e9a68e67c8dff25cfb582a92428704aa646 Mon Sep 17 00:00:00 2001 From: joboet Date: Sat, 8 Nov 2025 09:27:30 +0100 Subject: [PATCH 290/358] std: use a non-poisoning `RwLock` for the panic hook --- std/src/panicking.rs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/std/src/panicking.rs b/std/src/panicking.rs index 9af3e5f63ffb4..7efb7ad8ee8b3 100644 --- a/std/src/panicking.rs +++ b/std/src/panicking.rs @@ -22,7 +22,7 @@ use crate::io::try_set_output_capture; use crate::mem::{self, ManuallyDrop}; use crate::panic::{BacktraceStyle, PanicHookInfo}; use crate::sync::atomic::{Atomic, AtomicBool, Ordering}; -use crate::sync::{PoisonError, RwLock}; +use crate::sync::nonpoison::RwLock; use crate::sys::backtrace; use crate::sys::stdio::panic_output; use crate::{fmt, intrinsics, process, thread}; @@ -144,13 +144,9 @@ pub fn set_hook(hook: Box) + 'static + Sync + Send>) { panic!("cannot modify the panic hook from a panicking thread"); } - let new = Hook::Custom(hook); - let mut hook = HOOK.write().unwrap_or_else(PoisonError::into_inner); - let old = mem::replace(&mut *hook, new); - drop(hook); - // Only drop the old hook after releasing the lock to avoid deadlocking - // if its destructor panics. - drop(old); + // Drop the old hook after changing the hook to avoid deadlocking if its + // destructor panics. + drop(HOOK.replace(Hook::Custom(hook))); } /// Unregisters the current panic hook and returns it, registering the default hook @@ -188,11 +184,7 @@ pub fn take_hook() -> Box) + 'static + Sync + Send> { panic!("cannot modify the panic hook from a panicking thread"); } - let mut hook = HOOK.write().unwrap_or_else(PoisonError::into_inner); - let old_hook = mem::take(&mut *hook); - drop(hook); - - old_hook.into_box() + HOOK.replace(Hook::Default).into_box() } /// Atomic combination of [`take_hook`] and [`set_hook`]. Use this to replace the panic handler with @@ -238,7 +230,7 @@ where panic!("cannot modify the panic hook from a panicking thread"); } - let mut hook = HOOK.write().unwrap_or_else(PoisonError::into_inner); + let mut hook = HOOK.write(); let prev = mem::take(&mut *hook).into_box(); *hook = Hook::Custom(Box::new(move |info| hook_fn(&prev, info))); } @@ -822,7 +814,7 @@ fn panic_with_hook( crate::process::abort(); } - match *HOOK.read().unwrap_or_else(PoisonError::into_inner) { + match *HOOK.read() { // Some platforms (like wasm) know that printing to stderr won't ever actually // print anything, and if that's the case we can skip the default // hook. Since string formatting happens lazily when calling `payload` From a68a1bbe53651ffd2b62fa82ecf9c2b9a29ff91e Mon Sep 17 00:00:00 2001 From: joboet Date: Sat, 8 Nov 2025 15:43:57 +0100 Subject: [PATCH 291/358] std: support `RwLock` and thread parking on TEEOS --- std/src/sys/sync/rwlock/mod.rs | 5 +-- std/src/sys/sync/rwlock/teeos.rs | 50 -------------------------- std/src/sys/sync/thread_parking/mod.rs | 5 ++- 3 files changed, 5 insertions(+), 55 deletions(-) delete mode 100644 std/src/sys/sync/rwlock/teeos.rs diff --git a/std/src/sys/sync/rwlock/mod.rs b/std/src/sys/sync/rwlock/mod.rs index ab5715bf2de33..8603fca2da5b5 100644 --- a/std/src/sys/sync/rwlock/mod.rs +++ b/std/src/sys/sync/rwlock/mod.rs @@ -19,6 +19,7 @@ cfg_select! { all(target_os = "windows", target_vendor = "win7"), all(target_vendor = "fortanix", target_env = "sgx"), target_os = "xous", + target_os = "teeos", ) => { mod queue; pub use queue::RwLock; @@ -27,10 +28,6 @@ cfg_select! { mod solid; pub use solid::RwLock; } - target_os = "teeos" => { - mod teeos; - pub use teeos::RwLock; - } _ => { mod no_threads; pub use no_threads::RwLock; diff --git a/std/src/sys/sync/rwlock/teeos.rs b/std/src/sys/sync/rwlock/teeos.rs deleted file mode 100644 index 4a71a3abc2729..0000000000000 --- a/std/src/sys/sync/rwlock/teeos.rs +++ /dev/null @@ -1,50 +0,0 @@ -use crate::sys::sync::mutex::Mutex; - -/// we do not supported rwlock, so use mutex to simulate rwlock. -/// it's useful because so many code in std will use rwlock. -pub struct RwLock { - inner: Mutex, -} - -impl RwLock { - #[inline] - pub const fn new() -> RwLock { - RwLock { inner: Mutex::new() } - } - - #[inline] - pub fn read(&self) { - self.inner.lock() - } - - #[inline] - pub fn try_read(&self) -> bool { - self.inner.try_lock() - } - - #[inline] - pub fn write(&self) { - self.inner.lock() - } - - #[inline] - pub unsafe fn try_write(&self) -> bool { - self.inner.try_lock() - } - - #[inline] - pub unsafe fn read_unlock(&self) { - unsafe { self.inner.unlock() }; - } - - #[inline] - pub unsafe fn write_unlock(&self) { - unsafe { self.inner.unlock() }; - } - - #[inline] - pub unsafe fn downgrade(&self) { - // Since there is no difference between read-locked and write-locked on this platform, this - // function is simply a no-op as only 1 reader can read: the original writer. - } -} diff --git a/std/src/sys/sync/thread_parking/mod.rs b/std/src/sys/sync/thread_parking/mod.rs index e8a9dc884f816..74b5b72b19a75 100644 --- a/std/src/sys/sync/thread_parking/mod.rs +++ b/std/src/sys/sync/thread_parking/mod.rs @@ -35,7 +35,10 @@ cfg_select! { mod xous; pub use xous::Parker; } - target_family = "unix" => { + any( + target_family = "unix", + target_os = "teeos", + ) => { mod pthread; pub use pthread::Parker; } From 0c98a31881b63f235edb4c6bd9c467725eab723c Mon Sep 17 00:00:00 2001 From: Peter Jaszkowiak Date: Sat, 3 Aug 2024 22:57:10 -0600 Subject: [PATCH 292/358] add `overflow_checks` intrinsic --- core/src/intrinsics/mod.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index c397e762d5589..41afb3694b912 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -2585,6 +2585,24 @@ pub const fn ub_checks() -> bool { cfg!(ub_checks) } +/// Returns whether we should perform some overflow-checking at runtime. This eventually evaluates to +/// `cfg!(overflow_checks)`, but behaves different from `cfg!` when mixing crates built with different +/// flags: if the crate has overflow checks enabled or carries the `#[rustc_inherit_overflow_checks]` +/// attribute, evaluation is delayed until monomorphization (or until the call gets inlined into +/// a crate that does not delay evaluation further); otherwise it can happen any time. +/// +/// The common case here is a user program built with overflow_checks linked against the distributed +/// sysroot which is built without overflow_checks but with `#[rustc_inherit_overflow_checks]`. +/// For code that gets monomorphized in the user crate (i.e., generic functions and functions with +/// `#[inline]`), gating assertions on `overflow_checks()` rather than `cfg!(overflow_checks)` means that +/// assertions are enabled whenever the *user crate* has overflow checks enabled. However if the +/// user has overflow checks disabled, the checks will still get optimized out. +#[inline(always)] +#[rustc_intrinsic] +pub const fn overflow_checks() -> bool { + cfg!(debug_assertions) +} + /// Allocates a block of memory at compile time. /// At runtime, just returns a null pointer. /// From d51276d5dd1aa0b251887738ba41578d3c18ca5a Mon Sep 17 00:00:00 2001 From: bendn Date: Fri, 31 Oct 2025 23:00:02 +0700 Subject: [PATCH 293/358] constify result unwrap unchecked --- core/src/result.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/core/src/result.rs b/core/src/result.rs index 6fee7febde38d..9afa71ec0f117 100644 --- a/core/src/result.rs +++ b/core/src/result.rs @@ -1646,11 +1646,16 @@ impl Result { #[inline] #[track_caller] #[stable(feature = "option_result_unwrap_unchecked", since = "1.58.0")] - pub unsafe fn unwrap_unchecked(self) -> T { + #[rustc_const_unstable(feature = "const_result_unwrap_unchecked", issue = "148714")] + pub const unsafe fn unwrap_unchecked(self) -> T { match self { Ok(t) => t, - // SAFETY: the safety contract must be upheld by the caller. - Err(_) => unsafe { hint::unreachable_unchecked() }, + Err(e) => { + // FIXME(const-hack): to avoid E: const Destruct bound + super::mem::forget(e); + // SAFETY: the safety contract must be upheld by the caller. + unsafe { hint::unreachable_unchecked() } + } } } From fe3f52a7e53fa3e998ff59d6e61a2633d8b5403c Mon Sep 17 00:00:00 2001 From: nxsaken Date: Thu, 30 Oct 2025 13:59:39 +0400 Subject: [PATCH 294/358] Constify `ControlFlow` methods (unstable bounds) --- core/src/ops/control_flow.rs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/core/src/ops/control_flow.rs b/core/src/ops/control_flow.rs index b760a7c4e21eb..8eff1633c3b40 100644 --- a/core/src/ops/control_flow.rs +++ b/core/src/ops/control_flow.rs @@ -1,3 +1,4 @@ +use crate::marker::Destruct; use crate::{convert, ops}; /// Used to tell an operation whether it should exit early or go on as usual. @@ -183,7 +184,11 @@ impl ControlFlow { /// ``` #[inline] #[stable(feature = "control_flow_enum", since = "1.83.0")] - pub fn break_value(self) -> Option { + #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + pub const fn break_value(self) -> Option + where + Self: [const] Destruct, + { match self { ControlFlow::Continue(..) => None, ControlFlow::Break(x) => Some(x), @@ -268,7 +273,11 @@ impl ControlFlow { /// to the break value in case it exists. #[inline] #[stable(feature = "control_flow_enum", since = "1.83.0")] - pub fn map_break(self, f: impl FnOnce(B) -> T) -> ControlFlow { + #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + pub const fn map_break(self, f: F) -> ControlFlow + where + F: [const] FnOnce(B) -> T + [const] Destruct, + { match self { ControlFlow::Continue(x) => ControlFlow::Continue(x), ControlFlow::Break(x) => ControlFlow::Break(f(x)), @@ -288,7 +297,11 @@ impl ControlFlow { /// ``` #[inline] #[stable(feature = "control_flow_enum", since = "1.83.0")] - pub fn continue_value(self) -> Option { + #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + pub const fn continue_value(self) -> Option + where + Self: [const] Destruct, + { match self { ControlFlow::Continue(x) => Some(x), ControlFlow::Break(..) => None, @@ -372,7 +385,11 @@ impl ControlFlow { /// to the continue value in case it exists. #[inline] #[stable(feature = "control_flow_enum", since = "1.83.0")] - pub fn map_continue(self, f: impl FnOnce(C) -> T) -> ControlFlow { + #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + pub const fn map_continue(self, f: F) -> ControlFlow + where + F: [const] FnOnce(C) -> T + [const] Destruct, + { match self { ControlFlow::Continue(x) => ControlFlow::Continue(f(x)), ControlFlow::Break(x) => ControlFlow::Break(x), From d61ce24b6ee414c4949ad1465a8736c8ca975033 Mon Sep 17 00:00:00 2001 From: nxsaken Date: Sun, 9 Nov 2025 13:33:47 +0400 Subject: [PATCH 295/358] Add tracking issue number --- core/src/ops/control_flow.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/ops/control_flow.rs b/core/src/ops/control_flow.rs index 8eff1633c3b40..bc497db352021 100644 --- a/core/src/ops/control_flow.rs +++ b/core/src/ops/control_flow.rs @@ -184,7 +184,7 @@ impl ControlFlow { /// ``` #[inline] #[stable(feature = "control_flow_enum", since = "1.83.0")] - #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + #[rustc_const_unstable(feature = "const_control_flow", issue = "148739")] pub const fn break_value(self) -> Option where Self: [const] Destruct, @@ -273,7 +273,7 @@ impl ControlFlow { /// to the break value in case it exists. #[inline] #[stable(feature = "control_flow_enum", since = "1.83.0")] - #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + #[rustc_const_unstable(feature = "const_control_flow", issue = "148739")] pub const fn map_break(self, f: F) -> ControlFlow where F: [const] FnOnce(B) -> T + [const] Destruct, @@ -297,7 +297,7 @@ impl ControlFlow { /// ``` #[inline] #[stable(feature = "control_flow_enum", since = "1.83.0")] - #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + #[rustc_const_unstable(feature = "const_control_flow", issue = "148739")] pub const fn continue_value(self) -> Option where Self: [const] Destruct, @@ -385,7 +385,7 @@ impl ControlFlow { /// to the continue value in case it exists. #[inline] #[stable(feature = "control_flow_enum", since = "1.83.0")] - #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + #[rustc_const_unstable(feature = "const_control_flow", issue = "148739")] pub const fn map_continue(self, f: F) -> ControlFlow where F: [const] FnOnce(C) -> T + [const] Destruct, From a3d7eec25f6c1053e6cd8e0e627292148f260d34 Mon Sep 17 00:00:00 2001 From: nxsaken Date: Sun, 9 Nov 2025 13:36:35 +0400 Subject: [PATCH 296/358] Update feature name, add tracking issue number --- core/src/ops/control_flow.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/ops/control_flow.rs b/core/src/ops/control_flow.rs index e426ff7cc057a..1061ac082c02d 100644 --- a/core/src/ops/control_flow.rs +++ b/core/src/ops/control_flow.rs @@ -150,7 +150,7 @@ impl ControlFlow { /// ``` #[inline] #[stable(feature = "control_flow_enum_is", since = "1.59.0")] - #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + #[rustc_const_unstable(feature = "min_const_control_flow", issue = "148738")] pub const fn is_break(&self) -> bool { matches!(*self, ControlFlow::Break(_)) } @@ -167,7 +167,7 @@ impl ControlFlow { /// ``` #[inline] #[stable(feature = "control_flow_enum_is", since = "1.59.0")] - #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + #[rustc_const_unstable(feature = "min_const_control_flow", issue = "148738")] pub const fn is_continue(&self) -> bool { matches!(*self, ControlFlow::Continue(_)) } @@ -259,7 +259,7 @@ impl ControlFlow { /// ``` #[inline] #[unstable(feature = "control_flow_ok", issue = "140266")] - #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + #[rustc_const_unstable(feature = "min_const_control_flow", issue = "148738")] pub const fn break_ok(self) -> Result { match self { ControlFlow::Continue(c) => Err(c), @@ -364,7 +364,7 @@ impl ControlFlow { /// ``` #[inline] #[unstable(feature = "control_flow_ok", issue = "140266")] - #[rustc_const_unstable(feature = "const_control_flow", issue = "none")] + #[rustc_const_unstable(feature = "min_const_control_flow", issue = "148738")] pub const fn continue_ok(self) -> Result { match self { ControlFlow::Continue(c) => Ok(c), From 7830d741301333322292cdbb2c6b5c051e204642 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Sat, 19 Mar 2022 03:59:55 -0700 Subject: [PATCH 297/358] Implement the alternative `try` desugaring --- core/src/ops/try_trait.rs | 15 ++++++++++++--- std/src/sys/pal/unix/kernel_copy/tests.rs | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/core/src/ops/try_trait.rs b/core/src/ops/try_trait.rs index e1f2ebcf4c289..374e59e5d75c0 100644 --- a/core/src/ops/try_trait.rs +++ b/core/src/ops/try_trait.rs @@ -359,11 +359,20 @@ where /// and in the other direction, /// ` as Residual>::TryType = Result`. #[unstable(feature = "try_trait_v2_residual", issue = "91285")] -#[rustc_const_unstable(feature = "const_try", issue = "74935")] -pub const trait Residual { +#[rustc_const_unstable(feature = "const_try_residual", issue = "91285")] +pub const trait Residual: Sized { /// The "return" type of this meta-function. #[unstable(feature = "try_trait_v2_residual", issue = "91285")] - type TryType: Try; + type TryType: [const] Try; + + /// Here for convenience in the `?` desugaring. + /// Probably should not be stabilized, as it should never be overridden. + /// (without a `final fn` of some form, cc RFC#3678) + #[unstable(feature = "try_trait_v2_residual", issue = "91285")] + #[lang = "into_try_type"] + fn into_try_type(self) -> Self::TryType { + FromResidual::from_residual(self) + } } #[unstable(feature = "pub_crate_should_not_need_unstable_attr", issue = "none")] diff --git a/std/src/sys/pal/unix/kernel_copy/tests.rs b/std/src/sys/pal/unix/kernel_copy/tests.rs index 54d8f8ed2edd4..15dee768d928b 100644 --- a/std/src/sys/pal/unix/kernel_copy/tests.rs +++ b/std/src/sys/pal/unix/kernel_copy/tests.rs @@ -50,7 +50,7 @@ fn copy_specialization() -> Result<()> { "inner Take allowed reading beyond end of file, some bytes should be left" ); - let mut sink = sink.into_inner()?; + let mut sink = sink.into_inner().map_err(io::Error::from)?; sink.seek(SeekFrom::Start(0))?; let mut copied = Vec::new(); sink.read_to_end(&mut copied)?; From 335eb90458f80f12c59f854d6ac95d9bd761a5fb Mon Sep 17 00:00:00 2001 From: joboet Date: Sat, 11 Jan 2025 21:22:01 +0100 Subject: [PATCH 298/358] (almost) get rid of the unsound `#[rustc_unsafe_specialization_marker]` on `Copy`, introduce `TrivialClone` --- alloc/src/boxed/convert.rs | 6 ++- alloc/src/collections/vec_deque/mod.rs | 4 +- alloc/src/lib.rs | 1 + alloc/src/rc.rs | 11 +++-- alloc/src/slice.rs | 6 ++- alloc/src/sync.rs | 9 +++- alloc/src/vec/mod.rs | 8 ++-- alloc/src/vec/spec_extend.rs | 3 +- alloctests/lib.rs | 1 + core/src/array/mod.rs | 12 +++-- core/src/clone.rs | 48 +++++++++++++++++++ core/src/clone/uninit.rs | 5 +- core/src/marker.rs | 12 ++--- core/src/marker/variance.rs | 3 ++ core/src/mem/maybe_uninit.rs | 13 ++++- core/src/mem/mod.rs | 4 ++ core/src/num/nonzero.rs | 5 +- core/src/option.rs | 5 ++ core/src/ptr/metadata.rs | 3 ++ core/src/ptr/non_null.rs | 4 ++ core/src/ptr/unique.rs | 4 ++ core/src/slice/mod.rs | 66 ++++++++++++++++---------- core/src/slice/specialize.rs | 9 +++- core/src/sync/exclusive.rs | 4 ++ 24 files changed, 190 insertions(+), 56 deletions(-) diff --git a/alloc/src/boxed/convert.rs b/alloc/src/boxed/convert.rs index 45c46fb526365..73940db5d2f50 100644 --- a/alloc/src/boxed/convert.rs +++ b/alloc/src/boxed/convert.rs @@ -1,4 +1,6 @@ use core::any::Any; +#[cfg(not(no_global_oom_handling))] +use core::clone::TrivialClone; use core::error::Error; use core::mem; use core::pin::Pin; @@ -75,11 +77,13 @@ impl BoxFromSlice for Box<[T]> { } #[cfg(not(no_global_oom_handling))] -impl BoxFromSlice for Box<[T]> { +impl BoxFromSlice for Box<[T]> { #[inline] fn from_slice(slice: &[T]) -> Self { let len = slice.len(); let buf = RawVec::with_capacity(len); + // SAFETY: since `T` implements `TrivialClone`, this is sound and + // equivalent to the above. unsafe { ptr::copy_nonoverlapping(slice.as_ptr(), buf.ptr(), len); buf.into_box(slice.len()).assume_init() diff --git a/alloc/src/collections/vec_deque/mod.rs b/alloc/src/collections/vec_deque/mod.rs index 1d4b3b558c010..78930364a9260 100644 --- a/alloc/src/collections/vec_deque/mod.rs +++ b/alloc/src/collections/vec_deque/mod.rs @@ -7,6 +7,8 @@ #![stable(feature = "rust1", since = "1.0.0")] +#[cfg(not(no_global_oom_handling))] +use core::clone::TrivialClone; use core::cmp::{self, Ordering}; use core::hash::{Hash, Hasher}; use core::iter::{ByRefSized, repeat_n, repeat_with}; @@ -3419,7 +3421,7 @@ impl SpecExtendFromWithin for VecDeque { } #[cfg(not(no_global_oom_handling))] -impl SpecExtendFromWithin for VecDeque { +impl SpecExtendFromWithin for VecDeque { unsafe fn spec_extend_from_within(&mut self, src: Range) { let dst = self.len(); let count = src.end - src.start; diff --git a/alloc/src/lib.rs b/alloc/src/lib.rs index 73197d021f1a3..666ae27fb8634 100644 --- a/alloc/src/lib.rs +++ b/alloc/src/lib.rs @@ -146,6 +146,7 @@ #![feature(std_internals)] #![feature(str_internals)] #![feature(temporary_niche_types)] +#![feature(trivial_clone)] #![feature(trusted_fused)] #![feature(trusted_len)] #![feature(trusted_random_access)] diff --git a/alloc/src/rc.rs b/alloc/src/rc.rs index f0ce6aa03a8ba..581c714998f94 100644 --- a/alloc/src/rc.rs +++ b/alloc/src/rc.rs @@ -243,9 +243,9 @@ use core::any::Any; use core::cell::{Cell, CloneFromCell}; -#[cfg(not(no_global_oom_handling))] -use core::clone::CloneToUninit; use core::clone::UseCloned; +#[cfg(not(no_global_oom_handling))] +use core::clone::{CloneToUninit, TrivialClone}; use core::cmp::Ordering; use core::hash::{Hash, Hasher}; use core::intrinsics::abort; @@ -2224,7 +2224,8 @@ impl Rc<[T]> { /// Copy elements from slice into newly allocated `Rc<[T]>` /// - /// Unsafe because the caller must either take ownership or bind `T: Copy` + /// Unsafe because the caller must either take ownership, bind `T: Copy` or + /// bind `T: TrivialClone`. #[cfg(not(no_global_oom_handling))] unsafe fn copy_from_slice(v: &[T]) -> Rc<[T]> { unsafe { @@ -2314,9 +2315,11 @@ impl RcFromSlice for Rc<[T]> { } #[cfg(not(no_global_oom_handling))] -impl RcFromSlice for Rc<[T]> { +impl RcFromSlice for Rc<[T]> { #[inline] fn from_slice(v: &[T]) -> Self { + // SAFETY: `T` implements `TrivialClone`, so this is sound and equivalent + // to the above. unsafe { Rc::copy_from_slice(v) } } } diff --git a/alloc/src/slice.rs b/alloc/src/slice.rs index ce9f967cc387a..a83b51ccb60c3 100644 --- a/alloc/src/slice.rs +++ b/alloc/src/slice.rs @@ -11,6 +11,8 @@ use core::borrow::{Borrow, BorrowMut}; #[cfg(not(no_global_oom_handling))] +use core::clone::TrivialClone; +#[cfg(not(no_global_oom_handling))] use core::cmp::Ordering::{self, Less}; #[cfg(not(no_global_oom_handling))] use core::mem::MaybeUninit; @@ -439,7 +441,7 @@ impl [T] { } } - impl ConvertVec for T { + impl ConvertVec for T { #[inline] fn to_vec(s: &[Self], alloc: A) -> Vec { let mut v = Vec::with_capacity_in(s.len(), alloc); @@ -822,7 +824,7 @@ impl SpecCloneIntoVec for [T] { } #[cfg(not(no_global_oom_handling))] -impl SpecCloneIntoVec for [T] { +impl SpecCloneIntoVec for [T] { fn clone_into(&self, target: &mut Vec) { target.clear(); target.extend_from_slice(self); diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs index b85293973fd5c..6fb8df0f8c208 100644 --- a/alloc/src/sync.rs +++ b/alloc/src/sync.rs @@ -12,6 +12,8 @@ use core::any::Any; use core::cell::CloneFromCell; #[cfg(not(no_global_oom_handling))] use core::clone::CloneToUninit; +#[cfg(not(no_global_oom_handling))] +use core::clone::TrivialClone; use core::clone::UseCloned; use core::cmp::Ordering; use core::hash::{Hash, Hasher}; @@ -2156,7 +2158,8 @@ impl Arc<[T]> { /// Copy elements from slice into newly allocated `Arc<[T]>` /// - /// Unsafe because the caller must either take ownership or bind `T: Copy`. + /// Unsafe because the caller must either take ownership, bind `T: Copy` or + /// bind `T: TrivialClone`. #[cfg(not(no_global_oom_handling))] unsafe fn copy_from_slice(v: &[T]) -> Arc<[T]> { unsafe { @@ -2248,9 +2251,11 @@ impl ArcFromSlice for Arc<[T]> { } #[cfg(not(no_global_oom_handling))] -impl ArcFromSlice for Arc<[T]> { +impl ArcFromSlice for Arc<[T]> { #[inline] fn from_slice(v: &[T]) -> Self { + // SAFETY: `T` implements `TrivialClone`, so this is sound and equivalent + // to the above. unsafe { Arc::copy_from_slice(v) } } } diff --git a/alloc/src/vec/mod.rs b/alloc/src/vec/mod.rs index dc610d7b46741..43a68ff203738 100644 --- a/alloc/src/vec/mod.rs +++ b/alloc/src/vec/mod.rs @@ -73,6 +73,8 @@ #![stable(feature = "rust1", since = "1.0.0")] +#[cfg(not(no_global_oom_handling))] +use core::clone::TrivialClone; #[cfg(not(no_global_oom_handling))] use core::cmp; use core::cmp::Ordering; @@ -3494,7 +3496,7 @@ impl ExtendFromWithinSpec for Vec { } #[cfg(not(no_global_oom_handling))] -impl ExtendFromWithinSpec for Vec { +impl ExtendFromWithinSpec for Vec { unsafe fn spec_extend_from_within(&mut self, src: Range) { let count = src.len(); { @@ -3507,8 +3509,8 @@ impl ExtendFromWithinSpec for Vec { // SAFETY: // - Both pointers are created from unique slice references (`&mut [_]`) // so they are valid and do not overlap. - // - Elements are :Copy so it's OK to copy them, without doing - // anything with the original values + // - Elements implement `TrivialClone` so this is equivalent to calling + // `clone` on every one of them. // - `count` is equal to the len of `source`, so source is valid for // `count` reads // - `.reserve(count)` guarantees that `spare.len() >= count` so spare diff --git a/alloc/src/vec/spec_extend.rs b/alloc/src/vec/spec_extend.rs index 7085bceef5baa..f5bcd3ec9d82d 100644 --- a/alloc/src/vec/spec_extend.rs +++ b/alloc/src/vec/spec_extend.rs @@ -1,3 +1,4 @@ +use core::clone::TrivialClone; use core::iter::TrustedLen; use core::slice::{self}; @@ -48,7 +49,7 @@ where impl<'a, T: 'a, A: Allocator> SpecExtend<&'a T, slice::Iter<'a, T>> for Vec where - T: Copy, + T: TrivialClone, { fn spec_extend(&mut self, iterator: slice::Iter<'a, T>) { let slice = iterator.as_slice(); diff --git a/alloctests/lib.rs b/alloctests/lib.rs index efdcb893bfeef..665bc27047948 100644 --- a/alloctests/lib.rs +++ b/alloctests/lib.rs @@ -40,6 +40,7 @@ #![feature(slice_range)] #![feature(std_internals)] #![feature(temporary_niche_types)] +#![feature(trivial_clone)] #![feature(trusted_fused)] #![feature(trusted_len)] #![feature(trusted_random_access)] diff --git a/core/src/array/mod.rs b/core/src/array/mod.rs index 0dc10758a8560..68bc7eb6f811e 100644 --- a/core/src/array/mod.rs +++ b/core/src/array/mod.rs @@ -5,10 +5,10 @@ #![stable(feature = "core_array", since = "1.35.0")] use crate::borrow::{Borrow, BorrowMut}; +use crate::clone::TrivialClone; use crate::cmp::Ordering; use crate::convert::Infallible; use crate::error::Error; -use crate::fmt; use crate::hash::{self, Hash}; use crate::intrinsics::transmute_unchecked; use crate::iter::{UncheckedIterator, repeat_n}; @@ -18,6 +18,7 @@ use crate::ops::{ }; use crate::ptr::{null, null_mut}; use crate::slice::{Iter, IterMut}; +use crate::{fmt, ptr}; mod ascii; mod drain; @@ -451,6 +452,9 @@ impl Clone for [T; N] { } } +#[unstable(feature = "trivial_clone", issue = "none")] +unsafe impl TrivialClone for [T; N] {} + trait SpecArrayClone: Clone { fn clone(array: &[Self; N]) -> [Self; N]; } @@ -462,10 +466,12 @@ impl SpecArrayClone for T { } } -impl SpecArrayClone for T { +impl SpecArrayClone for T { #[inline] fn clone(array: &[T; N]) -> [T; N] { - *array + // SAFETY: `TrivialClone` implies that this is equivalent to calling + // `Clone` on every element. + unsafe { ptr::read(array) } } } diff --git a/core/src/clone.rs b/core/src/clone.rs index 06d2c93cc698f..3cf8b51d50e67 100644 --- a/core/src/clone.rs +++ b/core/src/clone.rs @@ -250,6 +250,33 @@ pub const trait Clone: Sized { } } +/// Indicates that the `Clone` implementation is identical to copying the value. +/// +/// This is used for some optimizations in the standard library, which specializes +/// on this trait to select faster implementations of functions such as +/// [`clone_from_slice`](slice::clone_from_slice). It is automatically implemented +/// when using `#[derive(Clone, Copy)]`. +/// +/// Note that this trait does not imply that the type is `Copy`, because e.g. +/// `core::ops::Range` could soundly implement this trait. +/// +/// # Safety +/// `Clone::clone` must be equivalent to copying the value, otherwise calling functions +/// such as `slice::clone_from_slice` can have undefined behaviour. +#[unstable( + feature = "trivial_clone", + reason = "this isn't part of any API guarantee", + issue = "none" +)] +#[rustc_const_unstable(feature = "const_clone", issue = "142757")] +// SAFETY: +// It is sound to specialize on this because the `clone` implementation cannot be +// lifetime-dependent. Therefore, if `TrivialClone` is implemented for any lifetime, +// its invariant holds whenever `Clone` is implemented, even if the actual +// `TrivialClone` bound would not be satisfied because of lifetime bounds. +#[rustc_unsafe_specialization_marker] +pub const unsafe trait TrivialClone: [const] Clone {} + /// Derive macro generating an impl of the trait `Clone`. #[rustc_builtin_macro] #[stable(feature = "builtin_macro_prelude", since = "1.38.0")] @@ -569,6 +596,7 @@ unsafe impl CloneToUninit for crate::bstr::ByteStr { /// are implemented in `traits::SelectionContext::copy_clone_conditions()` /// in `rustc_trait_selection`. mod impls { + use super::TrivialClone; use crate::marker::PointeeSized; macro_rules! impl_clone { @@ -582,6 +610,10 @@ mod impls { *self } } + + #[unstable(feature = "trivial_clone", issue = "none")] + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + unsafe impl const TrivialClone for $t {} )* } } @@ -602,6 +634,10 @@ mod impls { } } + #[unstable(feature = "trivial_clone", issue = "none")] + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + unsafe impl const TrivialClone for ! {} + #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] impl const Clone for *const T { @@ -611,6 +647,10 @@ mod impls { } } + #[unstable(feature = "trivial_clone", issue = "none")] + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + unsafe impl const TrivialClone for *const T {} + #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] impl const Clone for *mut T { @@ -620,6 +660,10 @@ mod impls { } } + #[unstable(feature = "trivial_clone", issue = "none")] + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + unsafe impl const TrivialClone for *mut T {} + /// Shared references can be cloned, but mutable references *cannot*! #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] @@ -631,6 +675,10 @@ mod impls { } } + #[unstable(feature = "trivial_clone", issue = "none")] + #[rustc_const_unstable(feature = "const_clone", issue = "142757")] + unsafe impl const TrivialClone for &T {} + /// Shared references can be cloned, but mutable references *cannot*! #[stable(feature = "rust1", since = "1.0.0")] impl !Clone for &mut T {} diff --git a/core/src/clone/uninit.rs b/core/src/clone/uninit.rs index 8b738bec796de..8d1185067eb88 100644 --- a/core/src/clone/uninit.rs +++ b/core/src/clone/uninit.rs @@ -1,3 +1,4 @@ +use super::TrivialClone; use crate::mem::{self, MaybeUninit}; use crate::ptr; @@ -49,9 +50,9 @@ unsafe impl CopySpec for T { } } -// Specialized implementation for types that are [`Copy`], not just [`Clone`], +// Specialized implementation for types that are [`TrivialClone`], not just [`Clone`], // and can therefore be copied bitwise. -unsafe impl CopySpec for T { +unsafe impl CopySpec for T { #[inline] unsafe fn clone_one(src: &Self, dst: *mut Self) { // SAFETY: The safety conditions of clone_to_uninit() are a superset of those of diff --git a/core/src/marker.rs b/core/src/marker.rs index 5fd0611a18434..64a403ce8cdbb 100644 --- a/core/src/marker.rs +++ b/core/src/marker.rs @@ -14,6 +14,7 @@ pub use self::variance::{ PhantomInvariant, PhantomInvariantLifetime, Variance, variance, }; use crate::cell::UnsafeCell; +use crate::clone::TrivialClone; use crate::cmp; use crate::fmt::Debug; use crate::hash::{Hash, Hasher}; @@ -454,12 +455,8 @@ marker_impls! { /// [impls]: #implementors #[stable(feature = "rust1", since = "1.0.0")] #[lang = "copy"] -// FIXME(matthewjasper) This allows copying a type that doesn't implement -// `Copy` because of unsatisfied lifetime bounds (copying `A<'_>` when only -// `A<'static>: Copy` and `A<'_>: Clone`). -// We have this attribute here for now only because there are quite a few -// existing specializations on `Copy` that already exist in the standard -// library, and there's no way to safely have this behavior right now. +// This is unsound, but required by `hashbrown` +// FIXME(joboet): change `hashbrown` to use `TrivialClone` #[rustc_unsafe_specialization_marker] #[rustc_diagnostic_item = "Copy"] pub trait Copy: Clone { @@ -861,6 +858,9 @@ impl Clone for PhantomData { } } +#[unstable(feature = "trivial_clone", issue = "none")] +unsafe impl TrivialClone for PhantomData {} + #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_unstable(feature = "const_default", issue = "143894")] impl const Default for PhantomData { diff --git a/core/src/marker/variance.rs b/core/src/marker/variance.rs index 55fdacb014e67..860d28262caed 100644 --- a/core/src/marker/variance.rs +++ b/core/src/marker/variance.rs @@ -2,6 +2,7 @@ use super::PhantomData; use crate::any::type_name; +use crate::clone::TrivialClone; use crate::cmp::Ordering; use crate::fmt; use crate::hash::{Hash, Hasher}; @@ -60,6 +61,8 @@ macro_rules! phantom_type { impl Copy for $name where T: ?Sized {} + unsafe impl TrivialClone for $name where T: ?Sized {} + impl PartialEq for $name where T: ?Sized { diff --git a/core/src/mem/maybe_uninit.rs b/core/src/mem/maybe_uninit.rs index 4ed914386eb87..33a6ea9ca7d13 100644 --- a/core/src/mem/maybe_uninit.rs +++ b/core/src/mem/maybe_uninit.rs @@ -1,4 +1,5 @@ use crate::any::type_name; +use crate::clone::TrivialClone; use crate::marker::Destruct; use crate::mem::ManuallyDrop; use crate::{fmt, intrinsics, ptr, slice}; @@ -356,6 +357,10 @@ impl Clone for MaybeUninit { } } +// SAFETY: the clone implementation is a copy, see above. +#[unstable(feature = "trivial_clone", issue = "none")] +unsafe impl TrivialClone for MaybeUninit where MaybeUninit: Clone {} + #[stable(feature = "maybe_uninit_debug", since = "1.41.0")] impl fmt::Debug for MaybeUninit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -1599,8 +1604,12 @@ impl SpecFill for [MaybeUninit] { } } -impl SpecFill for [MaybeUninit] { +impl SpecFill for [MaybeUninit] { fn spec_fill(&mut self, value: T) { - self.fill(MaybeUninit::new(value)); + // SAFETY: because `T` is `TrivialClone`, this is equivalent to calling + // `T::clone` for every element. Notably, `TrivialClone` also implies + // that the `clone` implementation will not panic, so we can avoid + // initialization guards and such. + self.fill_with(|| MaybeUninit::new(unsafe { ptr::read(&value) })); } } diff --git a/core/src/mem/mod.rs b/core/src/mem/mod.rs index 619e8a263db40..8332d6cfc0286 100644 --- a/core/src/mem/mod.rs +++ b/core/src/mem/mod.rs @@ -6,6 +6,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use crate::alloc::Layout; +use crate::clone::TrivialClone; use crate::marker::{Destruct, DiscriminantKind}; use crate::panic::const_assert; use crate::{clone, cmp, fmt, hash, intrinsics, ptr}; @@ -1070,6 +1071,9 @@ impl clone::Clone for Discriminant { } } +#[unstable(feature = "trivial_clone", issue = "none")] +unsafe impl TrivialClone for Discriminant {} + #[stable(feature = "discriminant_value", since = "1.21.0")] impl cmp::PartialEq for Discriminant { fn eq(&self, rhs: &Self) -> bool { diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs index 8cd8b0850e947..9685f521b38c1 100644 --- a/core/src/num/nonzero.rs +++ b/core/src/num/nonzero.rs @@ -1,7 +1,7 @@ //! Definitions of integer that is known not to equal zero. use super::{IntErrorKind, ParseIntError}; -use crate::clone::UseCloned; +use crate::clone::{TrivialClone, UseCloned}; use crate::cmp::Ordering; use crate::hash::{Hash, Hasher}; use crate::marker::{Destruct, Freeze, StructuralPartialEq}; @@ -199,6 +199,9 @@ impl UseCloned for NonZero where T: ZeroablePrimitive {} #[stable(feature = "nonzero", since = "1.28.0")] impl Copy for NonZero where T: ZeroablePrimitive {} +#[unstable(feature = "trivial_clone", issue = "none")] +unsafe impl TrivialClone for NonZero where T: ZeroablePrimitive {} + #[stable(feature = "nonzero", since = "1.28.0")] #[rustc_const_unstable(feature = "const_cmp", issue = "143800")] impl const PartialEq for NonZero diff --git a/core/src/option.rs b/core/src/option.rs index e3c4758bc6af5..3e42f954b8f49 100644 --- a/core/src/option.rs +++ b/core/src/option.rs @@ -581,6 +581,7 @@ #![stable(feature = "rust1", since = "1.0.0")] +use crate::clone::TrivialClone; use crate::iter::{self, FusedIterator, TrustedLen}; use crate::marker::Destruct; use crate::ops::{self, ControlFlow, Deref, DerefMut}; @@ -2215,6 +2216,10 @@ where #[unstable(feature = "ergonomic_clones", issue = "132290")] impl crate::clone::UseCloned for Option where T: crate::clone::UseCloned {} +#[unstable(feature = "trivial_clone", issue = "none")] +#[rustc_const_unstable(feature = "const_clone", issue = "142757")] +unsafe impl const TrivialClone for Option where T: [const] TrivialClone + [const] Destruct {} + #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_unstable(feature = "const_default", issue = "143894")] impl const Default for Option { diff --git a/core/src/ptr/metadata.rs b/core/src/ptr/metadata.rs index dc3ec3fd19945..ece195c2fe4a5 100644 --- a/core/src/ptr/metadata.rs +++ b/core/src/ptr/metadata.rs @@ -1,5 +1,6 @@ #![unstable(feature = "ptr_metadata", issue = "81513")] +use crate::clone::TrivialClone; use crate::fmt; use crate::hash::{Hash, Hasher}; use crate::intrinsics::{aggregate_raw_ptr, ptr_metadata}; @@ -231,6 +232,8 @@ impl Clone for DynMetadata { } } +unsafe impl TrivialClone for DynMetadata {} + impl Eq for DynMetadata {} impl PartialEq for DynMetadata { diff --git a/core/src/ptr/non_null.rs b/core/src/ptr/non_null.rs index a762e969b52dc..8f4538d9c8031 100644 --- a/core/src/ptr/non_null.rs +++ b/core/src/ptr/non_null.rs @@ -1,3 +1,4 @@ +use crate::clone::TrivialClone; use crate::cmp::Ordering; use crate::marker::{Destruct, PointeeSized, Unsize}; use crate::mem::{MaybeUninit, SizedTypeProperties}; @@ -1653,6 +1654,9 @@ impl Clone for NonNull { #[stable(feature = "nonnull", since = "1.25.0")] impl Copy for NonNull {} +#[unstable(feature = "trivial_clone", issue = "none")] +unsafe impl TrivialClone for NonNull {} + #[unstable(feature = "coerce_unsized", issue = "18598")] impl CoerceUnsized> for NonNull where T: Unsize {} diff --git a/core/src/ptr/unique.rs b/core/src/ptr/unique.rs index cdc8b6cc936df..85e006b98b8b6 100644 --- a/core/src/ptr/unique.rs +++ b/core/src/ptr/unique.rs @@ -1,3 +1,4 @@ +use crate::clone::TrivialClone; use crate::fmt; use crate::marker::{PhantomData, PointeeSized, Unsize}; use crate::ops::{CoerceUnsized, DispatchFromDyn}; @@ -165,6 +166,9 @@ impl Clone for Unique { #[unstable(feature = "ptr_internals", issue = "none")] impl Copy for Unique {} +#[unstable(feature = "trivial_clone", issue = "none")] +unsafe impl TrivialClone for Unique {} + #[unstable(feature = "ptr_internals", issue = "none")] impl CoerceUnsized> for Unique where T: Unsize {} diff --git a/core/src/slice/mod.rs b/core/src/slice/mod.rs index 1d88eb33dce10..f03f2045444df 100644 --- a/core/src/slice/mod.rs +++ b/core/src/slice/mod.rs @@ -6,6 +6,7 @@ #![stable(feature = "rust1", since = "1.0.0")] +use crate::clone::TrivialClone; use crate::cmp::Ordering::{self, Equal, Greater, Less}; use crate::intrinsics::{exact_div, unchecked_sub}; use crate::mem::{self, MaybeUninit, SizedTypeProperties}; @@ -3890,30 +3891,8 @@ impl [T] { where T: Copy, { - // The panic code path was put into a cold function to not bloat the - // call site. - #[cfg_attr(not(panic = "immediate-abort"), inline(never), cold)] - #[cfg_attr(panic = "immediate-abort", inline)] - #[track_caller] - const fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! { - const_panic!( - "copy_from_slice: source slice length does not match destination slice length", - "copy_from_slice: source slice length ({src_len}) does not match destination slice length ({dst_len})", - src_len: usize, - dst_len: usize, - ) - } - - if self.len() != src.len() { - len_mismatch_fail(self.len(), src.len()); - } - - // SAFETY: `self` is valid for `self.len()` elements by definition, and `src` was - // checked to have the same length. The slices cannot overlap because - // mutable references are exclusive. - unsafe { - ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), self.len()); - } + // SAFETY: `T` implements `Copy`. + unsafe { copy_from_slice_impl(self, src) } } /// Copies elements from one part of the slice to another part of itself, @@ -5123,6 +5102,38 @@ impl [f64] { } } +/// Copies `src` to `dest`. +/// +/// # Safety +/// `T` must implement one of `Copy` or `TrivialClone`. +#[track_caller] +const unsafe fn copy_from_slice_impl(dest: &mut [T], src: &[T]) { + // The panic code path was put into a cold function to not bloat the + // call site. + #[cfg_attr(not(panic = "immediate-abort"), inline(never), cold)] + #[cfg_attr(panic = "immediate-abort", inline)] + #[track_caller] + const fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! { + const_panic!( + "copy_from_slice: source slice length does not match destination slice length", + "copy_from_slice: source slice length ({src_len}) does not match destination slice length ({dst_len})", + src_len: usize, + dst_len: usize, + ) + } + + if dest.len() != src.len() { + len_mismatch_fail(dest.len(), src.len()); + } + + // SAFETY: `self` is valid for `self.len()` elements by definition, and `src` was + // checked to have the same length. The slices cannot overlap because + // mutable references are exclusive. + unsafe { + ptr::copy_nonoverlapping(src.as_ptr(), dest.as_mut_ptr(), dest.len()); + } +} + trait CloneFromSpec { fn spec_clone_from(&mut self, src: &[T]); } @@ -5147,11 +5158,14 @@ where impl CloneFromSpec for [T] where - T: Copy, + T: TrivialClone, { #[track_caller] fn spec_clone_from(&mut self, src: &[T]) { - self.copy_from_slice(src); + // SAFETY: `T` implements `TrivialClone`. + unsafe { + copy_from_slice_impl(self, src); + } } } diff --git a/core/src/slice/specialize.rs b/core/src/slice/specialize.rs index 17436395fee69..c44225b753642 100644 --- a/core/src/slice/specialize.rs +++ b/core/src/slice/specialize.rs @@ -1,3 +1,6 @@ +use crate::clone::TrivialClone; +use crate::ptr; + pub(super) trait SpecFill { fn spec_fill(&mut self, value: T); } @@ -14,10 +17,12 @@ impl SpecFill for [T] { } } -impl SpecFill for [T] { +impl SpecFill for [T] { default fn spec_fill(&mut self, value: T) { for item in self.iter_mut() { - *item = value; + // SAFETY: `TrivialClone` indicates that this is equivalent to + // calling `Clone::clone` + *item = unsafe { ptr::read(&value) }; } } } diff --git a/core/src/sync/exclusive.rs b/core/src/sync/exclusive.rs index f181c5514f256..7c79a191e409e 100644 --- a/core/src/sync/exclusive.rs +++ b/core/src/sync/exclusive.rs @@ -1,5 +1,6 @@ //! Defines [`Exclusive`]. +use core::clone::TrivialClone; use core::cmp::Ordering; use core::fmt; use core::future::Future; @@ -261,6 +262,9 @@ where } } +#[unstable(feature = "trivial_clone", issue = "none")] +unsafe impl TrivialClone for Exclusive where T: Sync + TrivialClone {} + #[unstable(feature = "exclusive_wrapper", issue = "98407")] impl Copy for Exclusive where T: Sync + Copy {} From 657873dd7f93d9fa98672159e8bfd3b11e626293 Mon Sep 17 00:00:00 2001 From: nxsaken Date: Sun, 9 Nov 2025 20:21:12 +0400 Subject: [PATCH 299/358] Constify `ManuallyDrop::take` --- core/src/mem/manually_drop.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/mem/manually_drop.rs b/core/src/mem/manually_drop.rs index 334a4b7119a11..b2328fcf7d844 100644 --- a/core/src/mem/manually_drop.rs +++ b/core/src/mem/manually_drop.rs @@ -217,8 +217,9 @@ impl ManuallyDrop { /// #[must_use = "if you don't need the value, you can use `ManuallyDrop::drop` instead"] #[stable(feature = "manually_drop_take", since = "1.42.0")] + #[rustc_const_unstable(feature = "const_manually_drop_take", issue = "none")] #[inline] - pub unsafe fn take(slot: &mut ManuallyDrop) -> T { + pub const unsafe fn take(slot: &mut ManuallyDrop) -> T { // SAFETY: we are reading from a reference, which is guaranteed // to be valid for reads. unsafe { ptr::read(&slot.value) } From df9d8b8b6b0d1208bb92c55cfc40a4fe298fa657 Mon Sep 17 00:00:00 2001 From: joboet Date: Fri, 17 Jan 2025 16:19:10 +0100 Subject: [PATCH 300/358] add a `TrivialClone` implementation when deriving both `Clone` and `Copy` --- core/src/clone.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/clone.rs b/core/src/clone.rs index 3cf8b51d50e67..4f76abdb77513 100644 --- a/core/src/clone.rs +++ b/core/src/clone.rs @@ -280,7 +280,7 @@ pub const unsafe trait TrivialClone: [const] Clone {} /// Derive macro generating an impl of the trait `Clone`. #[rustc_builtin_macro] #[stable(feature = "builtin_macro_prelude", since = "1.38.0")] -#[allow_internal_unstable(core_intrinsics, derive_clone_copy)] +#[allow_internal_unstable(core_intrinsics, derive_clone_copy, trivial_clone)] pub macro Clone($item:item) { /* compiler built-in */ } From 3fc578bffbca87d28814ab9ca8df759fb5bb4da8 Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 11 Feb 2025 11:16:45 +0100 Subject: [PATCH 301/358] alloc: remove test of unsound specialization behaviour --- alloctests/tests/vec.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/alloctests/tests/vec.rs b/alloctests/tests/vec.rs index ea334ab0f143a..03b82fa82440e 100644 --- a/alloctests/tests/vec.rs +++ b/alloctests/tests/vec.rs @@ -2299,20 +2299,6 @@ fn test_vec_swap() { assert_eq!(n, 0); } -#[test] -fn test_extend_from_within_spec() { - #[derive(Copy)] - struct CopyOnly; - - impl Clone for CopyOnly { - fn clone(&self) -> Self { - panic!("extend_from_within must use specialization on copy"); - } - } - - vec![CopyOnly, CopyOnly].extend_from_within(..); -} - #[test] fn test_extend_from_within_clone() { let mut v = vec![String::from("sssss"), String::from("12334567890"), String::from("c")]; From bc70836b7a00911ee5e101acf9173cdf5a71db2b Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 11 Feb 2025 11:42:22 +0100 Subject: [PATCH 302/358] make `TrivialClone` a `#[marker]`-trait to keep it from appearing in error messages --- core/src/clone.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/clone.rs b/core/src/clone.rs index 4f76abdb77513..eeb26043b39b3 100644 --- a/core/src/clone.rs +++ b/core/src/clone.rs @@ -275,6 +275,10 @@ pub const trait Clone: Sized { // its invariant holds whenever `Clone` is implemented, even if the actual // `TrivialClone` bound would not be satisfied because of lifetime bounds. #[rustc_unsafe_specialization_marker] +// If `#[derive(Clone, Clone, Copy)]` is written, there will be multiple +// implementations of `TrivialClone`. To keep it from appearing in error +// messages, make it a `#[marker]` trait. +#[marker] pub const unsafe trait TrivialClone: [const] Clone {} /// Derive macro generating an impl of the trait `Clone`. From 08b0ee4a4b499326fba17ab5c737c76228120271 Mon Sep 17 00:00:00 2001 From: joboet Date: Mon, 17 Mar 2025 13:11:41 +0100 Subject: [PATCH 303/358] automatically implement `TrivialClone` for closures and tuples If each of the component types is `TrivialClone`, the closure/tuple itself can be trivially cloned. --- core/src/clone.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/clone.rs b/core/src/clone.rs index eeb26043b39b3..40ab760f3e702 100644 --- a/core/src/clone.rs +++ b/core/src/clone.rs @@ -269,6 +269,7 @@ pub const trait Clone: Sized { issue = "none" )] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] +#[lang = "trivial_clone"] // SAFETY: // It is sound to specialize on this because the `clone` implementation cannot be // lifetime-dependent. Therefore, if `TrivialClone` is implemented for any lifetime, From bce723b4166cb537732e832e18180c283cc37a61 Mon Sep 17 00:00:00 2001 From: Zachary S Date: Sat, 8 Nov 2025 22:24:41 -0600 Subject: [PATCH 304/358] Optimize Vec::from_elem for some more cases. Implement IsZero for (). Implement default `IsZero` for all arrays, only returning true if the array is empty (making the existing array impl for `IsZero` elements a specialization). Optimize `IsZero::is_zero` for arrays of zero-sized `IsZero` elements. --- alloc/src/vec/is_zero.rs | 42 +++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/alloc/src/vec/is_zero.rs b/alloc/src/vec/is_zero.rs index a3ddd6f6e230e..04b50e5762986 100644 --- a/alloc/src/vec/is_zero.rs +++ b/alloc/src/vec/is_zero.rs @@ -1,3 +1,4 @@ +use core::mem::SizedTypeProperties; use core::num::{NonZero, Saturating, Wrapping}; use crate::boxed::Box; @@ -20,6 +21,8 @@ macro_rules! impl_is_zero { }; } +impl_is_zero!((), |_: ()| true); // It is needed to impl for arrays and tuples of (). + impl_is_zero!(i8, |x| x == 0); // It is needed to impl for arrays and tuples of i8. impl_is_zero!(i16, |x| x == 0); impl_is_zero!(i32, |x| x == 0); @@ -43,17 +46,38 @@ impl_is_zero!(f64, |x: f64| x.to_bits() == 0); // `IsZero` cannot be soundly implemented for pointers because of provenance // (see #135338). +unsafe impl IsZero for [T; N] { + #[inline] + default fn is_zero(&self) -> bool { + // If the array is of length zero, + // then it doesn't actually contain any `T`s, + // so `T::clone` doesn't need to be called, + // and we can "zero-initialize" all zero bytes of the array. + N == 0 + } +} + unsafe impl IsZero for [T; N] { #[inline] fn is_zero(&self) -> bool { - // Because this is generated as a runtime check, it's not obvious that - // it's worth doing if the array is really long. The threshold here - // is largely arbitrary, but was picked because as of 2022-07-01 LLVM - // fails to const-fold the check in `vec![[1; 32]; n]` - // See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022 - // Feel free to tweak if you have better evidence. - - N <= 16 && self.iter().all(IsZero::is_zero) + if T::IS_ZST { + // If T is a ZST, then there is at most one possible value of `T`, + // so we only need to check one element for zeroness. + // We can't unconditionally return `true` here, since, e.g. + // `T = [NonTrivialCloneZst; 5]` is a ZST that implements `IsZero` + // due to the generic array impl, but `T::is_zero` returns `false` + // since the length is not 0. + self.get(0).is_none_or(IsZero::is_zero) + } else { + // Because this is generated as a runtime check, it's not obvious that + // it's worth doing if the array is really long. The threshold here + // is largely arbitrary, but was picked because as of 2022-07-01 LLVM + // fails to const-fold the check in `vec![[1; 32]; n]` + // See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022 + // Feel free to tweak if you have better evidence. + + N <= 16 && self.iter().all(IsZero::is_zero) + } } } @@ -61,7 +85,7 @@ unsafe impl IsZero for [T; N] { macro_rules! impl_is_zero_tuples { // Stopper () => { - // No use for implementing for empty tuple because it is ZST. + // We already have an impl for () above. }; ($first_arg:ident $(,$rest:ident)*) => { unsafe impl <$first_arg: IsZero, $($rest: IsZero,)*> IsZero for ($first_arg, $($rest,)*){ From 3984d562c832f671054a151709f400b411d16c20 Mon Sep 17 00:00:00 2001 From: nxsaken Date: Sun, 9 Nov 2025 21:52:04 +0400 Subject: [PATCH 305/358] Constify `mem::take` --- core/src/mem/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/mem/mod.rs b/core/src/mem/mod.rs index 619e8a263db40..1a39a9a551603 100644 --- a/core/src/mem/mod.rs +++ b/core/src/mem/mod.rs @@ -807,7 +807,8 @@ pub const fn swap(x: &mut T, y: &mut T) { /// ``` #[inline] #[stable(feature = "mem_take", since = "1.40.0")] -pub fn take(dest: &mut T) -> T { +#[rustc_const_unstable(feature = "const_default", issue = "143894")] +pub const fn take(dest: &mut T) -> T { replace(dest, T::default()) } From 6397a4ebbbd88b89887a8a048e216313b48e9aef Mon Sep 17 00:00:00 2001 From: joboet Date: Mon, 24 Mar 2025 18:47:49 +0100 Subject: [PATCH 306/358] prevent `TrivialClone` implementations from appearing in rustdoc output --- core/src/array/mod.rs | 1 + core/src/clone.rs | 5 +++++ core/src/marker.rs | 1 + core/src/marker/variance.rs | 1 + core/src/mem/maybe_uninit.rs | 1 + core/src/mem/mod.rs | 1 + core/src/num/nonzero.rs | 1 + core/src/option.rs | 1 + core/src/ptr/metadata.rs | 1 + core/src/ptr/non_null.rs | 1 + core/src/ptr/unique.rs | 1 + core/src/sync/exclusive.rs | 1 + 12 files changed, 16 insertions(+) diff --git a/core/src/array/mod.rs b/core/src/array/mod.rs index 68bc7eb6f811e..2dd639d68f0ea 100644 --- a/core/src/array/mod.rs +++ b/core/src/array/mod.rs @@ -452,6 +452,7 @@ impl Clone for [T; N] { } } +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] unsafe impl TrivialClone for [T; N] {} diff --git a/core/src/clone.rs b/core/src/clone.rs index 40ab760f3e702..bf8875098edfa 100644 --- a/core/src/clone.rs +++ b/core/src/clone.rs @@ -616,6 +616,7 @@ mod impls { } } + #[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] unsafe impl const TrivialClone for $t {} @@ -639,6 +640,7 @@ mod impls { } } + #[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] unsafe impl const TrivialClone for ! {} @@ -652,6 +654,7 @@ mod impls { } } + #[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] unsafe impl const TrivialClone for *const T {} @@ -665,6 +668,7 @@ mod impls { } } + #[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] unsafe impl const TrivialClone for *mut T {} @@ -680,6 +684,7 @@ mod impls { } } + #[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] unsafe impl const TrivialClone for &T {} diff --git a/core/src/marker.rs b/core/src/marker.rs index 64a403ce8cdbb..e945cd77a75f7 100644 --- a/core/src/marker.rs +++ b/core/src/marker.rs @@ -858,6 +858,7 @@ impl Clone for PhantomData { } } +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] unsafe impl TrivialClone for PhantomData {} diff --git a/core/src/marker/variance.rs b/core/src/marker/variance.rs index 860d28262caed..5fc62a5ad7ac2 100644 --- a/core/src/marker/variance.rs +++ b/core/src/marker/variance.rs @@ -61,6 +61,7 @@ macro_rules! phantom_type { impl Copy for $name where T: ?Sized {} + #[doc(hidden)] unsafe impl TrivialClone for $name where T: ?Sized {} impl PartialEq for $name diff --git a/core/src/mem/maybe_uninit.rs b/core/src/mem/maybe_uninit.rs index 33a6ea9ca7d13..3507d1a0a9a8c 100644 --- a/core/src/mem/maybe_uninit.rs +++ b/core/src/mem/maybe_uninit.rs @@ -358,6 +358,7 @@ impl Clone for MaybeUninit { } // SAFETY: the clone implementation is a copy, see above. +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] unsafe impl TrivialClone for MaybeUninit where MaybeUninit: Clone {} diff --git a/core/src/mem/mod.rs b/core/src/mem/mod.rs index 8332d6cfc0286..b180e88fd49bc 100644 --- a/core/src/mem/mod.rs +++ b/core/src/mem/mod.rs @@ -1071,6 +1071,7 @@ impl clone::Clone for Discriminant { } } +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] unsafe impl TrivialClone for Discriminant {} diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs index 9685f521b38c1..983e00e46a5f6 100644 --- a/core/src/num/nonzero.rs +++ b/core/src/num/nonzero.rs @@ -199,6 +199,7 @@ impl UseCloned for NonZero where T: ZeroablePrimitive {} #[stable(feature = "nonzero", since = "1.28.0")] impl Copy for NonZero where T: ZeroablePrimitive {} +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] unsafe impl TrivialClone for NonZero where T: ZeroablePrimitive {} diff --git a/core/src/option.rs b/core/src/option.rs index 3e42f954b8f49..57098b95f641b 100644 --- a/core/src/option.rs +++ b/core/src/option.rs @@ -2216,6 +2216,7 @@ where #[unstable(feature = "ergonomic_clones", issue = "132290")] impl crate::clone::UseCloned for Option where T: crate::clone::UseCloned {} +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] #[rustc_const_unstable(feature = "const_clone", issue = "142757")] unsafe impl const TrivialClone for Option where T: [const] TrivialClone + [const] Destruct {} diff --git a/core/src/ptr/metadata.rs b/core/src/ptr/metadata.rs index ece195c2fe4a5..998a5b031c280 100644 --- a/core/src/ptr/metadata.rs +++ b/core/src/ptr/metadata.rs @@ -232,6 +232,7 @@ impl Clone for DynMetadata { } } +#[doc(hidden)] unsafe impl TrivialClone for DynMetadata {} impl Eq for DynMetadata {} diff --git a/core/src/ptr/non_null.rs b/core/src/ptr/non_null.rs index 8f4538d9c8031..aa3af2f185287 100644 --- a/core/src/ptr/non_null.rs +++ b/core/src/ptr/non_null.rs @@ -1654,6 +1654,7 @@ impl Clone for NonNull { #[stable(feature = "nonnull", since = "1.25.0")] impl Copy for NonNull {} +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] unsafe impl TrivialClone for NonNull {} diff --git a/core/src/ptr/unique.rs b/core/src/ptr/unique.rs index 85e006b98b8b6..5e7b1f7038024 100644 --- a/core/src/ptr/unique.rs +++ b/core/src/ptr/unique.rs @@ -166,6 +166,7 @@ impl Clone for Unique { #[unstable(feature = "ptr_internals", issue = "none")] impl Copy for Unique {} +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] unsafe impl TrivialClone for Unique {} diff --git a/core/src/sync/exclusive.rs b/core/src/sync/exclusive.rs index 7c79a191e409e..35b8120995187 100644 --- a/core/src/sync/exclusive.rs +++ b/core/src/sync/exclusive.rs @@ -262,6 +262,7 @@ where } } +#[doc(hidden)] #[unstable(feature = "trivial_clone", issue = "none")] unsafe impl TrivialClone for Exclusive where T: Sync + TrivialClone {} From c7a23a87d9a0b947c0ab8015dcd3b25fd32e9b0c Mon Sep 17 00:00:00 2001 From: nxsaken Date: Mon, 10 Nov 2025 09:38:04 +0400 Subject: [PATCH 307/358] Add tracking issue number --- core/src/mem/manually_drop.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/mem/manually_drop.rs b/core/src/mem/manually_drop.rs index b2328fcf7d844..7d49da8509577 100644 --- a/core/src/mem/manually_drop.rs +++ b/core/src/mem/manually_drop.rs @@ -217,7 +217,7 @@ impl ManuallyDrop { /// #[must_use = "if you don't need the value, you can use `ManuallyDrop::drop` instead"] #[stable(feature = "manually_drop_take", since = "1.42.0")] - #[rustc_const_unstable(feature = "const_manually_drop_take", issue = "none")] + #[rustc_const_unstable(feature = "const_manually_drop_take", issue = "148773")] #[inline] pub const unsafe fn take(slot: &mut ManuallyDrop) -> T { // SAFETY: we are reading from a reference, which is guaranteed From a81606c4c74010616b881bfed54988bd97ea8e0a Mon Sep 17 00:00:00 2001 From: reddevilmidzy Date: Mon, 10 Nov 2025 14:56:05 +0900 Subject: [PATCH 308/358] Expand pow docs with special-case tests --- core/src/num/f128.rs | 1 + core/src/num/f16.rs | 1 + core/src/num/int_macros.rs | 6 ++++++ core/src/num/uint_macros.rs | 6 ++++++ std/src/num/f128.rs | 1 + std/src/num/f16.rs | 1 + std/src/num/f32.rs | 2 ++ std/src/num/f64.rs | 2 ++ 8 files changed, 20 insertions(+) diff --git a/core/src/num/f128.rs b/core/src/num/f128.rs index e7101537b298f..2cf06b6d6a35a 100644 --- a/core/src/num/f128.rs +++ b/core/src/num/f128.rs @@ -1770,6 +1770,7 @@ impl f128 { /// assert!(abs_difference <= f128::EPSILON); /// /// assert_eq!(f128::powi(f128::NAN, 0), 1.0); + /// assert_eq!(f128::powi(0.0, 0), 1.0); /// # } /// ``` #[inline] diff --git a/core/src/num/f16.rs b/core/src/num/f16.rs index aa8342a22ad58..51f803672e5c6 100644 --- a/core/src/num/f16.rs +++ b/core/src/num/f16.rs @@ -1745,6 +1745,7 @@ impl f16 { /// assert!(abs_difference <= f16::EPSILON); /// /// assert_eq!(f16::powi(f16::NAN, 0), 1.0); + /// assert_eq!(f16::powi(0.0, 0), 1.0); /// # } /// ``` #[inline] diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index c3460a6409069..63eb340b917a7 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -1714,6 +1714,7 @@ macro_rules! int_impl { /// /// ``` #[doc = concat!("assert_eq!(8", stringify!($SelfT), ".checked_pow(2), Some(64));")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".checked_pow(0), Some(1));")] #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.checked_pow(2), None);")] /// ``` @@ -1755,6 +1756,7 @@ macro_rules! int_impl { /// /// ``` #[doc = concat!("assert_eq!(8", stringify!($SelfT), ".strict_pow(2), 64);")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".strict_pow(0), 1);")] /// ``` /// /// The following panics because of overflow: @@ -2027,6 +2029,7 @@ macro_rules! int_impl { /// /// ``` #[doc = concat!("assert_eq!((-4", stringify!($SelfT), ").saturating_pow(3), -64);")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".saturating_pow(0), 1);")] #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MIN.saturating_pow(2), ", stringify!($SelfT), "::MAX);")] #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MIN.saturating_pow(3), ", stringify!($SelfT), "::MIN);")] /// ``` @@ -2371,6 +2374,7 @@ macro_rules! int_impl { #[doc = concat!("assert_eq!(3", stringify!($SelfT), ".wrapping_pow(4), 81);")] /// assert_eq!(3i8.wrapping_pow(5), -13); /// assert_eq!(3i8.wrapping_pow(6), -39); + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".wrapping_pow(0), 1);")] /// ``` #[stable(feature = "no_panic_pow", since = "1.34.0")] #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] @@ -2961,6 +2965,7 @@ macro_rules! int_impl { /// /// ``` #[doc = concat!("assert_eq!(3", stringify!($SelfT), ".overflowing_pow(4), (81, false));")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".overflowing_pow(0), (1, false));")] /// assert_eq!(3i8.overflowing_pow(5), (-13, true)); /// ``` #[stable(feature = "no_panic_pow", since = "1.34.0")] @@ -3004,6 +3009,7 @@ macro_rules! int_impl { #[doc = concat!("let x: ", stringify!($SelfT), " = 2; // or any other integer type")] /// /// assert_eq!(x.pow(5), 32); + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".pow(0), 1);")] /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index b5b768cf677aa..3fc8428e0431e 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -2057,6 +2057,7 @@ macro_rules! uint_impl { /// /// ``` #[doc = concat!("assert_eq!(2", stringify!($SelfT), ".checked_pow(5), Some(32));")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".checked_pow(0), Some(1));")] #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.checked_pow(2), None);")] /// ``` #[stable(feature = "no_panic_pow", since = "1.34.0")] @@ -2097,6 +2098,7 @@ macro_rules! uint_impl { /// /// ``` #[doc = concat!("assert_eq!(2", stringify!($SelfT), ".strict_pow(5), 32);")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".strict_pow(0), 1);")] /// ``` /// /// The following panics because of overflow: @@ -2271,6 +2273,7 @@ macro_rules! uint_impl { /// /// ``` #[doc = concat!("assert_eq!(4", stringify!($SelfT), ".saturating_pow(3), 64);")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".saturating_pow(0), 1);")] #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.saturating_pow(2), ", stringify!($SelfT), "::MAX);")] /// ``` #[stable(feature = "no_panic_pow", since = "1.34.0")] @@ -2580,6 +2583,7 @@ macro_rules! uint_impl { /// ``` #[doc = concat!("assert_eq!(3", stringify!($SelfT), ".wrapping_pow(5), 243);")] /// assert_eq!(3u8.wrapping_pow(6), 217); + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".wrapping_pow(0), 1);")] /// ``` #[stable(feature = "no_panic_pow", since = "1.34.0")] #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] @@ -3254,6 +3258,7 @@ macro_rules! uint_impl { /// /// ``` #[doc = concat!("assert_eq!(3", stringify!($SelfT), ".overflowing_pow(5), (243, false));")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".overflowing_pow(0), (1, false));")] /// assert_eq!(3u8.overflowing_pow(6), (217, true)); /// ``` #[stable(feature = "no_panic_pow", since = "1.34.0")] @@ -3295,6 +3300,7 @@ macro_rules! uint_impl { /// /// ``` #[doc = concat!("assert_eq!(2", stringify!($SelfT), ".pow(5), 32);")] + #[doc = concat!("assert_eq!(0_", stringify!($SelfT), ".pow(0), 1);")] /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")] diff --git a/std/src/num/f128.rs b/std/src/num/f128.rs index 40061d089284b..3b787713afa24 100644 --- a/std/src/num/f128.rs +++ b/std/src/num/f128.rs @@ -37,6 +37,7 @@ impl f128 { /// /// assert_eq!(f128::powf(1.0, f128::NAN), 1.0); /// assert_eq!(f128::powf(f128::NAN, 0.0), 1.0); + /// assert_eq!(f128::powf(0.0, 0.0), 1.0); /// # } /// ``` #[inline] diff --git a/std/src/num/f16.rs b/std/src/num/f16.rs index 0d43b60a62fea..4af21c95c9baf 100644 --- a/std/src/num/f16.rs +++ b/std/src/num/f16.rs @@ -37,6 +37,7 @@ impl f16 { /// /// assert_eq!(f16::powf(1.0, f16::NAN), 1.0); /// assert_eq!(f16::powf(f16::NAN, 0.0), 1.0); + /// assert_eq!(f16::powf(0.0, 0.0), 1.0); /// # } /// ``` #[inline] diff --git a/std/src/num/f32.rs b/std/src/num/f32.rs index c9e192201affc..09ced388a3399 100644 --- a/std/src/num/f32.rs +++ b/std/src/num/f32.rs @@ -308,6 +308,7 @@ impl f32 { /// assert!(abs_difference <= 1e-5); /// /// assert_eq!(f32::powi(f32::NAN, 0), 1.0); + /// assert_eq!(f32::powi(0.0, 0), 1.0); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -333,6 +334,7 @@ impl f32 { /// /// assert_eq!(f32::powf(1.0, f32::NAN), 1.0); /// assert_eq!(f32::powf(f32::NAN, 0.0), 1.0); + /// assert_eq!(f32::powf(0.0, 0.0), 1.0); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] diff --git a/std/src/num/f64.rs b/std/src/num/f64.rs index 11874f9280f02..79adf076e4b1a 100644 --- a/std/src/num/f64.rs +++ b/std/src/num/f64.rs @@ -308,6 +308,7 @@ impl f64 { /// assert!(abs_difference <= 1e-14); /// /// assert_eq!(f64::powi(f64::NAN, 0), 1.0); + /// assert_eq!(f64::powi(0.0, 0), 1.0); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -333,6 +334,7 @@ impl f64 { /// /// assert_eq!(f64::powf(1.0, f64::NAN), 1.0); /// assert_eq!(f64::powf(f64::NAN, 0.0), 1.0); + /// assert_eq!(f64::powf(0.0, 0.0), 1.0); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] From b2e2780a26f028b13d9615a49e5a2407dde7badb Mon Sep 17 00:00:00 2001 From: reddevilmidzy Date: Mon, 10 Nov 2025 15:22:08 +0900 Subject: [PATCH 309/358] Fix a typo in the documentation for the strict_shr function --- core/src/num/int_macros.rs | 2 +- core/src/num/uint_macros.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index c3460a6409069..4e47ec1b78edf 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -1497,7 +1497,7 @@ macro_rules! int_impl { } } - /// Strict shift right. Computes `self >> rhs`, panicking `rhs` is + /// Strict shift right. Computes `self >> rhs`, panicking if `rhs` is /// larger than or equal to the number of bits in `self`. /// /// # Panics diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index b5b768cf677aa..e97900d4dc32a 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -1902,7 +1902,7 @@ macro_rules! uint_impl { } } - /// Strict shift right. Computes `self >> rhs`, panicking `rhs` is + /// Strict shift right. Computes `self >> rhs`, panicking if `rhs` is /// larger than or equal to the number of bits in `self`. /// /// # Panics From 927ce59c99979f41a7715de69f3cfd94811f1f31 Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Tue, 12 Aug 2025 21:49:30 +0530 Subject: [PATCH 310/358] uefi: fs: Add file times plumbing - Add FileTimes implementation. Signed-off-by: Ayush Singh --- std/src/sys/fs/uefi.rs | 43 ++++++++++++++--- std/src/sys/pal/uefi/tests.rs | 81 ++++++++++++++++++++++++++++--- std/src/sys/pal/uefi/time.rs | 89 ++++++++++++++++++++++++----------- 3 files changed, 174 insertions(+), 39 deletions(-) diff --git a/std/src/sys/fs/uefi.rs b/std/src/sys/fs/uefi.rs index e4e7274ae8cb3..fc5f159ec188e 100644 --- a/std/src/sys/fs/uefi.rs +++ b/std/src/sys/fs/uefi.rs @@ -18,6 +18,9 @@ pub struct File(!); pub struct FileAttr { attr: u64, size: u64, + accessed: SystemTime, + modified: SystemTime, + created: SystemTime, } pub struct ReadDir(!); @@ -33,7 +36,10 @@ pub struct OpenOptions { } #[derive(Copy, Clone, Debug, Default)] -pub struct FileTimes {} +pub struct FileTimes { + accessed: Option, + modified: Option, +} #[derive(Clone, PartialEq, Eq, Debug)] // Bool indicates if file is readonly @@ -60,15 +66,15 @@ impl FileAttr { } pub fn modified(&self) -> io::Result { - unsupported() + Ok(self.modified) } pub fn accessed(&self) -> io::Result { - unsupported() + Ok(self.accessed) } pub fn created(&self) -> io::Result { - unsupported() + Ok(self.created) } } @@ -92,8 +98,13 @@ impl FilePermissions { } impl FileTimes { - pub fn set_accessed(&mut self, _t: SystemTime) {} - pub fn set_modified(&mut self, _t: SystemTime) {} + pub fn set_accessed(&mut self, t: SystemTime) { + self.accessed = Some(t); + } + + pub fn set_modified(&mut self, t: SystemTime) { + self.modified = Some(t); + } } impl FileType { @@ -394,6 +405,7 @@ mod uefi_fs { use crate::path::Path; use crate::ptr::NonNull; use crate::sys::helpers; + use crate::sys::time::{self, SystemTime}; pub(crate) struct File(NonNull); @@ -541,4 +553,23 @@ mod uefi_fs { Ok(()) } + + /// EDK2 FAT driver uses EFI_UNSPECIFIED_TIMEZONE to represent localtime. So for proper + /// conversion to SystemTime, we use the current time to get the timezone in such cases. + #[expect(dead_code)] + fn uefi_to_systemtime(mut time: r_efi::efi::Time) -> SystemTime { + time.timezone = if time.timezone == r_efi::efi::UNSPECIFIED_TIMEZONE { + time::system_time_internal::now().unwrap().timezone + } else { + time.timezone + }; + SystemTime::from_uefi(time) + } + + /// Convert to UEFI Time with the current timezone. + #[expect(dead_code)] + fn systemtime_to_uefi(time: SystemTime) -> r_efi::efi::Time { + let now = time::system_time_internal::now().unwrap(); + time.to_uefi_loose(now.timezone, now.daylight) + } } diff --git a/std/src/sys/pal/uefi/tests.rs b/std/src/sys/pal/uefi/tests.rs index 56ca999cc7e99..df3344e2df346 100644 --- a/std/src/sys/pal/uefi/tests.rs +++ b/std/src/sys/pal/uefi/tests.rs @@ -8,6 +8,20 @@ use crate::time::Duration; const SECS_IN_MINUTE: u64 = 60; +const MAX_UEFI_TIME: Duration = from_uefi(r_efi::efi::Time { + year: 9999, + month: 12, + day: 31, + hour: 23, + minute: 59, + second: 59, + nanosecond: 999_999_999, + timezone: 1440, + daylight: 0, + pad1: 0, + pad2: 0, +}); + #[test] fn align() { // UEFI ABI specifies that allocation alignment minimum is always 8. So this can be @@ -28,6 +42,19 @@ fn align() { } } +// UEFI Time cannot implement Eq due to uninitilaized pad1 and pad2 +fn uefi_time_cmp(t1: r_efi::efi::Time, t2: r_efi::efi::Time) -> bool { + t1.year == t2.year + && t1.month == t2.month + && t1.day == t2.day + && t1.hour == t2.hour + && t1.minute == t2.minute + && t1.second == t2.second + && t1.nanosecond == t2.nanosecond + && t1.timezone == t2.timezone + && t1.daylight == t2.daylight +} + #[test] fn systemtime_start() { let t = r_efi::efi::Time { @@ -37,14 +64,15 @@ fn systemtime_start() { hour: 0, minute: 0, second: 0, + pad1: 0, nanosecond: 0, timezone: -1440, daylight: 0, pad2: 0, }; assert_eq!(from_uefi(&t), Duration::new(0, 0)); - assert_eq!(t, to_uefi(&from_uefi(&t), -1440, 0).unwrap()); - assert!(to_uefi(&from_uefi(&t), 0, 0).is_none()); + assert!(uefi_time_cmp(t, to_uefi(&from_uefi(&t), -1440, 0).unwrap())); + assert!(to_uefi(&from_uefi(&t), 0, 0).is_err()); } #[test] @@ -63,8 +91,8 @@ fn systemtime_utc_start() { pad2: 0, }; assert_eq!(from_uefi(&t), Duration::new(1440 * SECS_IN_MINUTE, 0)); - assert_eq!(t, to_uefi(&from_uefi(&t), 0, 0).unwrap()); - assert!(to_uefi(&from_uefi(&t), -1440, 0).is_some()); + assert!(uefi_time_cmp(t, to_uefi(&from_uefi(&t), 0, 0).unwrap())); + assert!(to_uefi(&from_uefi(&t), -1440, 0).is_ok()); } #[test] @@ -82,8 +110,49 @@ fn systemtime_end() { daylight: 0, pad2: 0, }; - assert!(to_uefi(&from_uefi(&t), 1440, 0).is_some()); - assert!(to_uefi(&from_uefi(&t), 1439, 0).is_none()); + assert!(to_uefi(&from_uefi(&t), 1440, 0).is_ok()); + assert!(to_uefi(&from_uefi(&t), 1439, 0).is_err()); +} + +#[test] +fn min_time() { + let inp = Duration::from_secs(1440 * SECS_IN_MINUTE); + let new_tz = to_uefi(&inp, 1440, 0).err().unwrap(); + assert_eq!(new_tz, 0); + assert!(to_uefi(&inp, new_tz, 0).is_ok()); + + let inp = Duration::from_secs(1450 * SECS_IN_MINUTE); + let new_tz = to_uefi(&inp, 1440, 0).err().unwrap(); + assert_eq!(new_tz, 10); + assert!(to_uefi(&inp, new_tz, 0).is_ok()); + + let inp = Duration::from_secs(1450 * SECS_IN_MINUTE + 10); + let new_tz = to_uefi(&inp, 1440, 0).err().unwrap(); + assert_eq!(new_tz, 10); + assert!(to_uefi(&inp, new_tz, 0).is_ok()); + + let inp = Duration::from_secs(1430 * SECS_IN_MINUTE); + let new_tz = to_uefi(&inp, 1440, 0).err().unwrap(); + assert_eq!(new_tz, -10); + assert!(to_uefi(&inp, new_tz, 0).is_ok()); +} + +#[test] +fn max_time() { + let inp = MAX_UEFI_TIME.0; + let new_tz = to_uefi(&inp, -1440, 0).err().unwrap(); + assert_eq!(new_tz, 1440); + assert!(to_uefi(&inp, new_tz, 0).is_ok()); + + let inp = MAX_UEFI_TIME.0 - Duration::from_secs(1440 * SECS_IN_MINUTE); + let new_tz = to_uefi(&inp, -1440, 0).err().unwrap(); + assert_eq!(new_tz, 0); + assert!(to_uefi(&inp, new_tz, 0).is_ok()); + + let inp = MAX_UEFI_TIME.0 - Duration::from_secs(1440 * SECS_IN_MINUTE + 10); + let new_tz = to_uefi(&inp, -1440, 0).err().unwrap(); + assert_eq!(new_tz, 0); + assert!(to_uefi(&inp, new_tz, 0).is_ok()); } // UEFI IoSlice and IoSliceMut Tests diff --git a/std/src/sys/pal/uefi/time.rs b/std/src/sys/pal/uefi/time.rs index 861b98da18daf..f9f90a454976a 100644 --- a/std/src/sys/pal/uefi/time.rs +++ b/std/src/sys/pal/uefi/time.rs @@ -1,5 +1,7 @@ use crate::time::Duration; +const SECS_IN_MINUTE: u64 = 60; + #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] pub struct Instant(Duration); @@ -70,13 +72,32 @@ impl SystemTime { Self(system_time_internal::from_uefi(&t)) } - #[expect(dead_code)] - pub(crate) const fn to_uefi(self, timezone: i16, daylight: u8) -> Option { - system_time_internal::to_uefi(&self.0, timezone, daylight) + pub(crate) const fn to_uefi( + self, + timezone: i16, + daylight: u8, + ) -> Result { + // system_time_internal::to_uefi requires a valid timezone. In case of unspecified timezone, + // we just pass 0 since it is assumed that no timezone related adjustments are required. + if timezone == r_efi::efi::UNSPECIFIED_TIMEZONE { + system_time_internal::to_uefi(&self.0, 0, daylight) + } else { + system_time_internal::to_uefi(&self.0, timezone, daylight) + } + } + + /// Create UEFI Time with the closest timezone (minute offset) that still allows the time to be + /// represented. + pub(crate) fn to_uefi_loose(self, timezone: i16, daylight: u8) -> r_efi::efi::Time { + match self.to_uefi(timezone, daylight) { + Ok(x) => x, + Err(tz) => self.to_uefi(tz, daylight).unwrap(), + } } pub fn now() -> SystemTime { system_time_internal::now() + .map(Self::from_uefi) .unwrap_or_else(|| panic!("time not implemented on this platform")) } @@ -104,12 +125,11 @@ pub(crate) mod system_time_internal { use crate::mem::MaybeUninit; use crate::ptr::NonNull; - const SECS_IN_MINUTE: u64 = 60; const SECS_IN_HOUR: u64 = SECS_IN_MINUTE * 60; const SECS_IN_DAY: u64 = SECS_IN_HOUR * 24; - const TIMEZONE_DELTA: u64 = 1440 * SECS_IN_MINUTE; + const SYSTEMTIME_TIMEZONE: i64 = -1440 * SECS_IN_MINUTE as i64; - pub fn now() -> Option { + pub(crate) fn now() -> Option

(&mut self, mut predicate: P) -> Option where Self: Sized, P: FnMut(Self::Item) -> bool, From dc930ec5e468e9276d9d08e640fb9369cf262388 Mon Sep 17 00:00:00 2001 From: Pavel Grigorenko Date: Tue, 19 Aug 2025 17:46:31 +0300 Subject: [PATCH 342/358] Stabilize `char_max_len` --- alloc/src/lib.rs | 1 - alloc/src/wtf8/mod.rs | 4 ++-- alloctests/lib.rs | 1 - alloctests/tests/lib.rs | 1 - alloctests/tests/str.rs | 5 ++--- core/src/char/methods.rs | 4 ++-- core/src/fmt/mod.rs | 6 +++--- core/src/str/pattern.rs | 3 +-- core/src/wtf8.rs | 4 ++-- coretests/tests/char.rs | 3 +-- std/src/fs/tests.rs | 7 +++---- std/src/lib.rs | 1 - std/src/sys/stdio/windows.rs | 3 +-- 13 files changed, 17 insertions(+), 26 deletions(-) diff --git a/alloc/src/lib.rs b/alloc/src/lib.rs index dc58b30afbce3..80c27d7ec0871 100644 --- a/alloc/src/lib.rs +++ b/alloc/src/lib.rs @@ -99,7 +99,6 @@ #![feature(cast_maybe_uninit)] #![feature(cell_get_cloned)] #![feature(char_internals)] -#![feature(char_max_len)] #![feature(clone_to_uninit)] #![feature(coerce_unsized)] #![feature(const_convert)] diff --git a/alloc/src/wtf8/mod.rs b/alloc/src/wtf8/mod.rs index 047994adc4486..e4834a24bf430 100644 --- a/alloc/src/wtf8/mod.rs +++ b/alloc/src/wtf8/mod.rs @@ -14,7 +14,7 @@ #[cfg(test)] mod tests; -use core::char::{MAX_LEN_UTF8, encode_utf8_raw}; +use core::char::encode_utf8_raw; use core::hash::{Hash, Hasher}; pub use core::wtf8::{CodePoint, Wtf8}; #[cfg(not(test))] @@ -166,7 +166,7 @@ impl Wtf8Buf { /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. /// Copied from String::push. unsafe fn push_code_point_unchecked(&mut self, code_point: CodePoint) { - let mut bytes = [0; MAX_LEN_UTF8]; + let mut bytes = [0; char::MAX_LEN_UTF8]; let bytes = encode_utf8_raw(code_point.to_u32(), &mut bytes); self.bytes.extend_from_slice(bytes) } diff --git a/alloctests/lib.rs b/alloctests/lib.rs index 665bc27047948..73c25679d05ba 100644 --- a/alloctests/lib.rs +++ b/alloctests/lib.rs @@ -19,7 +19,6 @@ #![feature(array_into_iter_constructors)] #![feature(assert_matches)] #![feature(char_internals)] -#![feature(char_max_len)] #![feature(copied_into_inner)] #![feature(core_intrinsics)] #![feature(exact_size_is_empty)] diff --git a/alloctests/tests/lib.rs b/alloctests/tests/lib.rs index 52124495f8c36..b4d3e75b09942 100644 --- a/alloctests/tests/lib.rs +++ b/alloctests/tests/lib.rs @@ -3,7 +3,6 @@ #![feature(iter_array_chunks)] #![feature(assert_matches)] #![feature(wtf8_internals)] -#![feature(char_max_len)] #![feature(cow_is_borrowed)] #![feature(core_intrinsics)] #![feature(deque_extend_front)] diff --git a/alloctests/tests/str.rs b/alloctests/tests/str.rs index 906fa2d425e77..fbb3b01fd67f9 100644 --- a/alloctests/tests/str.rs +++ b/alloctests/tests/str.rs @@ -2,7 +2,6 @@ use std::assert_matches::assert_matches; use std::borrow::Cow; -use std::char::MAX_LEN_UTF8; use std::cmp::Ordering::{Equal, Greater, Less}; use std::str::{from_utf8, from_utf8_unchecked}; @@ -1232,7 +1231,7 @@ fn test_to_uppercase_rev_iterator() { #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn test_chars_decoding() { - let mut bytes = [0; MAX_LEN_UTF8]; + let mut bytes = [0; char::MAX_LEN_UTF8]; for c in (0..0x110000).filter_map(std::char::from_u32) { let s = c.encode_utf8(&mut bytes); if Some(c) != s.chars().next() { @@ -1244,7 +1243,7 @@ fn test_chars_decoding() { #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn test_chars_rev_decoding() { - let mut bytes = [0; MAX_LEN_UTF8]; + let mut bytes = [0; char::MAX_LEN_UTF8]; for c in (0..0x110000).filter_map(std::char::from_u32) { let s = c.encode_utf8(&mut bytes); if Some(c) != s.chars().rev().next() { diff --git a/core/src/char/methods.rs b/core/src/char/methods.rs index 76f54db287079..d1de2c5606154 100644 --- a/core/src/char/methods.rs +++ b/core/src/char/methods.rs @@ -74,12 +74,12 @@ impl char { /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to /// UTF-8 encoding. - #[unstable(feature = "char_max_len", issue = "121714")] + #[stable(feature = "char_max_len_assoc", since = "CURRENT_RUSTC_VERSION")] pub const MAX_LEN_UTF8: usize = 4; /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char` /// to UTF-16 encoding. - #[unstable(feature = "char_max_len", issue = "121714")] + #[stable(feature = "char_max_len_assoc", since = "CURRENT_RUSTC_VERSION")] pub const MAX_LEN_UTF16: usize = 2; /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a diff --git a/core/src/fmt/mod.rs b/core/src/fmt/mod.rs index 7ca33d363cd8f..5e7858a1b4936 100644 --- a/core/src/fmt/mod.rs +++ b/core/src/fmt/mod.rs @@ -3,7 +3,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell}; -use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8}; +use crate::char::EscapeDebugExtArgs; use crate::hint::assert_unchecked; use crate::marker::{PhantomData, PointeeSized}; use crate::num::fmt as numfmt; @@ -181,7 +181,7 @@ pub trait Write { /// ``` #[stable(feature = "fmt_write_char", since = "1.1.0")] fn write_char(&mut self, c: char) -> Result { - self.write_str(c.encode_utf8(&mut [0; MAX_LEN_UTF8])) + self.write_str(c.encode_utf8(&mut [0; char::MAX_LEN_UTF8])) } /// Glue for usage of the [`write!`] macro with implementors of this trait. @@ -2983,7 +2983,7 @@ impl Display for char { if f.options.flags & (flags::WIDTH_FLAG | flags::PRECISION_FLAG) == 0 { f.write_char(*self) } else { - f.pad(self.encode_utf8(&mut [0; MAX_LEN_UTF8])) + f.pad(self.encode_utf8(&mut [0; char::MAX_LEN_UTF8])) } } } diff --git a/core/src/str/pattern.rs b/core/src/str/pattern.rs index e116b13838323..b54522fcc886f 100644 --- a/core/src/str/pattern.rs +++ b/core/src/str/pattern.rs @@ -38,7 +38,6 @@ issue = "27721" )] -use crate::char::MAX_LEN_UTF8; use crate::cmp::Ordering; use crate::convert::TryInto as _; use crate::slice::memchr; @@ -563,7 +562,7 @@ impl Pattern for char { #[inline] fn into_searcher<'a>(self, haystack: &'a str) -> Self::Searcher<'a> { - let mut utf8_encoded = [0; MAX_LEN_UTF8]; + let mut utf8_encoded = [0; char::MAX_LEN_UTF8]; let utf8_size = self .encode_utf8(&mut utf8_encoded) .len() diff --git a/core/src/wtf8.rs b/core/src/wtf8.rs index 11cd2b8776f22..b64fccedc19cd 100644 --- a/core/src/wtf8.rs +++ b/core/src/wtf8.rs @@ -19,7 +19,7 @@ // implementations, so, we'll have to add more doc(hidden)s anyway #![doc(hidden)] -use crate::char::{MAX_LEN_UTF16, encode_utf16_raw}; +use crate::char::encode_utf16_raw; use crate::clone::CloneToUninit; use crate::fmt::{self, Write}; use crate::hash::{Hash, Hasher}; @@ -541,7 +541,7 @@ impl Iterator for EncodeWide<'_> { return Some(tmp); } - let mut buf = [0; MAX_LEN_UTF16]; + let mut buf = [0; char::MAX_LEN_UTF16]; self.code_points.next().map(|code_point| { let n = encode_utf16_raw(code_point.to_u32(), &mut buf).len(); if n == 2 { diff --git a/coretests/tests/char.rs b/coretests/tests/char.rs index 6f94065b2d927..f0f6a24429284 100644 --- a/coretests/tests/char.rs +++ b/coretests/tests/char.rs @@ -1,4 +1,3 @@ -use std::char::MAX_LEN_UTF8; use std::str::FromStr; use std::{char, str}; @@ -259,7 +258,7 @@ fn test_escape_unicode() { #[test] fn test_encode_utf8() { fn check(input: char, expect: &[u8]) { - let mut buf = [0; MAX_LEN_UTF8]; + let mut buf = [0; char::MAX_LEN_UTF8]; let ptr = buf.as_ptr(); let s = input.encode_utf8(&mut buf); assert_eq!(s.as_ptr() as usize, ptr as usize); diff --git a/std/src/fs/tests.rs b/std/src/fs/tests.rs index 0517760c35501..9fd87e119906e 100644 --- a/std/src/fs/tests.rs +++ b/std/src/fs/tests.rs @@ -9,7 +9,6 @@ use rand::RngCore; target_vendor = "apple", ))] use crate::assert_matches::assert_matches; -use crate::char::MAX_LEN_UTF8; #[cfg(any( windows, target_os = "freebsd", @@ -174,7 +173,7 @@ fn file_test_io_non_positional_read() { #[test] fn file_test_io_seek_and_tell_smoke_test() { let message = "ten-four"; - let mut read_mem = [0; MAX_LEN_UTF8]; + let mut read_mem = [0; char::MAX_LEN_UTF8]; let set_cursor = 4 as u64; let tell_pos_pre_read; let tell_pos_post_read; @@ -405,7 +404,7 @@ fn file_test_io_seek_shakedown() { let chunk_one: &str = "qwer"; let chunk_two: &str = "asdf"; let chunk_three: &str = "zxcv"; - let mut read_mem = [0; MAX_LEN_UTF8]; + let mut read_mem = [0; char::MAX_LEN_UTF8]; let tmpdir = tmpdir(); let filename = &tmpdir.join("file_rt_io_file_test_seek_shakedown.txt"); { @@ -782,7 +781,7 @@ fn file_test_directoryinfo_readdir() { check!(w.write(msg)); } let files = check!(fs::read_dir(dir)); - let mut mem = [0; MAX_LEN_UTF8]; + let mut mem = [0; char::MAX_LEN_UTF8]; for f in files { let f = f.unwrap().path(); { diff --git a/std/src/lib.rs b/std/src/lib.rs index 5374e9e61783f..c443078b0291f 100644 --- a/std/src/lib.rs +++ b/std/src/lib.rs @@ -276,7 +276,6 @@ #![feature(cfg_sanitizer_cfi)] #![feature(cfg_target_thread_local)] #![feature(cfi_encoding)] -#![feature(char_max_len)] #![feature(const_trait_impl)] #![feature(core_float_math)] #![feature(decl_macro)] diff --git a/std/src/sys/stdio/windows.rs b/std/src/sys/stdio/windows.rs index 9b27f76b9dd1a..62ec115d7b0cb 100644 --- a/std/src/sys/stdio/windows.rs +++ b/std/src/sys/stdio/windows.rs @@ -1,6 +1,5 @@ #![unstable(issue = "none", feature = "windows_stdio")] -use core::char::MAX_LEN_UTF8; use core::str::utf8_char_width; use crate::mem::MaybeUninit; @@ -427,7 +426,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result { impl IncompleteUtf8 { pub const fn new() -> IncompleteUtf8 { - IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 } + IncompleteUtf8 { bytes: [0; char::MAX_LEN_UTF8], len: 0 } } } From 90350592831f3413503975db571e6aae4b2e1d8a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 29 Oct 2025 17:47:36 +0100 Subject: [PATCH 343/358] const-eval: fix and re-enable pointer fragment support --- core/src/ptr/mod.rs | 38 +------------------------------------- coretests/tests/ptr.rs | 9 ++++----- 2 files changed, 5 insertions(+), 42 deletions(-) diff --git a/core/src/ptr/mod.rs b/core/src/ptr/mod.rs index fd067d19fcd98..ea0514f405f1e 100644 --- a/core/src/ptr/mod.rs +++ b/core/src/ptr/mod.rs @@ -1352,40 +1352,6 @@ pub const unsafe fn swap(x: *mut T, y: *mut T) { /// assert_eq!(x, [7, 8, 3, 4]); /// assert_eq!(y, [1, 2, 9]); /// ``` -/// -/// # Const evaluation limitations -/// -/// If this function is invoked during const-evaluation, the current implementation has a small (and -/// rarely relevant) limitation: if `count` is at least 2 and the data pointed to by `x` or `y` -/// contains a pointer that crosses the boundary of two `T`-sized chunks of memory, the function may -/// fail to evaluate (similar to a panic during const-evaluation). This behavior may change in the -/// future. -/// -/// The limitation is illustrated by the following example: -/// -/// ``` -/// use std::mem::size_of; -/// use std::ptr; -/// -/// const { unsafe { -/// const PTR_SIZE: usize = size_of::<*const i32>(); -/// let mut data1 = [0u8; PTR_SIZE]; -/// let mut data2 = [0u8; PTR_SIZE]; -/// // Store a pointer in `data1`. -/// data1.as_mut_ptr().cast::<*const i32>().write_unaligned(&42); -/// // Swap the contents of `data1` and `data2` by swapping `PTR_SIZE` many `u8`-sized chunks. -/// // This call will fail, because the pointer in `data1` crosses the boundary -/// // between several of the 1-byte chunks that are being swapped here. -/// //ptr::swap_nonoverlapping(data1.as_mut_ptr(), data2.as_mut_ptr(), PTR_SIZE); -/// // Swap the contents of `data1` and `data2` by swapping a single chunk of size -/// // `[u8; PTR_SIZE]`. That works, as there is no pointer crossing the boundary between -/// // two chunks. -/// ptr::swap_nonoverlapping(&mut data1, &mut data2, 1); -/// // Read the pointer from `data2` and dereference it. -/// let ptr = data2.as_ptr().cast::<*const i32>().read_unaligned(); -/// assert!(*ptr == 42); -/// } } -/// ``` #[inline] #[stable(feature = "swap_nonoverlapping", since = "1.27.0")] #[rustc_const_stable(feature = "const_swap_nonoverlapping", since = "1.88.0")] @@ -1414,9 +1380,7 @@ pub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize) { const_eval_select!( @capture[T] { x: *mut T, y: *mut T, count: usize }: if const { - // At compile-time we want to always copy this in chunks of `T`, to ensure that if there - // are pointers inside `T` we will copy them in one go rather than trying to copy a part - // of a pointer (which would not work). + // At compile-time we don't need all the special code below. // SAFETY: Same preconditions as this function unsafe { swap_nonoverlapping_const(x, y, count) } } else { diff --git a/coretests/tests/ptr.rs b/coretests/tests/ptr.rs index 555a3b01f1fcb..93f9454d71378 100644 --- a/coretests/tests/ptr.rs +++ b/coretests/tests/ptr.rs @@ -945,13 +945,12 @@ fn test_const_swap_ptr() { assert!(*s1.0.ptr == 666); assert!(*s2.0.ptr == 1); - // Swap them back, again as an array. - // FIXME(#146291): we should be swapping back at type `u8` but that currently does not work. + // Swap them back, byte-for-byte unsafe { ptr::swap_nonoverlapping( - ptr::from_mut(&mut s1).cast::(), - ptr::from_mut(&mut s2).cast::(), - 1, + ptr::from_mut(&mut s1).cast::(), + ptr::from_mut(&mut s2).cast::(), + size_of::(), ); } From e927a33e1515448820debfa05d9109e8eeeb144a Mon Sep 17 00:00:00 2001 From: Alisa Sireneva Date: Sat, 15 Nov 2025 12:27:45 +0300 Subject: [PATCH 344/358] Document Error::{new,other} as to be avoided in pre_exec --- std/src/os/unix/process.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/std/src/os/unix/process.rs b/std/src/os/unix/process.rs index 5b7b5a8ea803d..ee0c460f7dfa7 100644 --- a/std/src/os/unix/process.rs +++ b/std/src/os/unix/process.rs @@ -80,6 +80,9 @@ pub trait CommandExt: Sealed { /// or acquiring a mutex are not guaranteed to work (due to /// other threads perhaps still running when the `fork` was run). /// + /// Note that the list of allocating functions includes [`Error::new`] and + /// [`Error::other`]. To signal a non-trivial error, prefer [`panic!`]. + /// /// For further details refer to the [POSIX fork() specification] /// and the equivalent documentation for any targeted /// platform, especially the requirements around *async-signal-safety*. @@ -102,6 +105,8 @@ pub trait CommandExt: Sealed { /// [POSIX fork() specification]: /// https://pubs.opengroup.org/onlinepubs/9699919799/functions/fork.html /// [`std::env`]: mod@crate::env + /// [`Error::new`]: crate::io::Error::new + /// [`Error::other`]: crate::io::Error::other #[stable(feature = "process_pre_exec", since = "1.34.0")] unsafe fn pre_exec(&mut self, f: F) -> &mut process::Command where From 5b67b0924c4f9990a14312590259a593aeb6f76e Mon Sep 17 00:00:00 2001 From: Peter Jaszkowiak Date: Sat, 3 May 2025 23:03:47 -0600 Subject: [PATCH 345/358] fix a couple unstable attributes --- core/src/range/iter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/range/iter.rs b/core/src/range/iter.rs index 24efd4a204a5f..8d301f4799b8f 100644 --- a/core/src/range/iter.rs +++ b/core/src/range/iter.rs @@ -168,7 +168,7 @@ impl IterRangeInclusive { } } -#[unstable(feature = "trusted_random_access", issue = "none")] +#[unstable(feature = "new_range_api", issue = "125687")] impl Iterator for IterRangeInclusive { type Item = A; @@ -302,7 +302,7 @@ impl IterRangeFrom { } } -#[unstable(feature = "trusted_random_access", issue = "none")] +#[unstable(feature = "new_range_api", issue = "125687")] impl Iterator for IterRangeFrom { type Item = A; From f7e9a77a8fc5feb3e30523e5ba12ecf865b8bba1 Mon Sep 17 00:00:00 2001 From: Peter Jaszkowiak Date: Sat, 3 May 2025 23:32:17 -0600 Subject: [PATCH 346/358] IterRangeFrom: overflow panic after yielding MAX check overflow after yielding MAX value new `0_u8..` will yield `255` and only panic on the subsequent `next()` --- core/src/range/iter.rs | 57 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/core/src/range/iter.rs b/core/src/range/iter.rs index 8d301f4799b8f..9a8824baefe4e 100644 --- a/core/src/range/iter.rs +++ b/core/src/range/iter.rs @@ -3,6 +3,7 @@ use crate::iter::{ }; use crate::num::NonZero; use crate::range::{Range, RangeFrom, RangeInclusive, legacy}; +use crate::{intrinsics, mem}; /// By-value [`Range`] iterator. #[unstable(feature = "new_range_api", issue = "125687")] @@ -293,12 +294,25 @@ range_incl_exact_iter_impl! { /// By-value [`RangeFrom`] iterator. #[unstable(feature = "new_range_api", issue = "125687")] #[derive(Debug, Clone)] -pub struct IterRangeFrom(legacy::RangeFrom); +pub struct IterRangeFrom { + start: A, + /// Whether the first element of the iterator has yielded. + /// Only used when overflow checks are enabled. + first: bool, +} -impl IterRangeFrom { +impl IterRangeFrom { /// Returns the remainder of the range being iterated over. + #[inline] + #[rustc_inherit_overflow_checks] pub fn remainder(self) -> RangeFrom { - RangeFrom { start: self.0.start } + if intrinsics::overflow_checks() { + if !self.first { + return RangeFrom { start: Step::forward(self.start, 1) }; + } + } + + RangeFrom { start: self.start } } } @@ -307,18 +321,47 @@ impl Iterator for IterRangeFrom { type Item = A; #[inline] + #[rustc_inherit_overflow_checks] fn next(&mut self) -> Option { - self.0.next() + if intrinsics::overflow_checks() { + if self.first { + self.first = false; + return Some(self.start.clone()); + } + + self.start = Step::forward(self.start.clone(), 1); + return Some(self.start.clone()); + } + + let n = Step::forward(self.start.clone(), 1); + Some(mem::replace(&mut self.start, n)) } #[inline] fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() + (usize::MAX, None) } #[inline] + #[rustc_inherit_overflow_checks] fn nth(&mut self, n: usize) -> Option { - self.0.nth(n) + if intrinsics::overflow_checks() { + if self.first { + self.first = false; + + let plus_n = Step::forward(self.start.clone(), n); + self.start = plus_n.clone(); + return Some(plus_n); + } + + let plus_n = Step::forward(self.start.clone(), n); + self.start = Step::forward(plus_n.clone(), 1); + return Some(self.start.clone()); + } + + let plus_n = Step::forward(self.start.clone(), n); + self.start = Step::forward(plus_n.clone(), 1); + Some(plus_n) } } @@ -334,6 +377,6 @@ impl IntoIterator for RangeFrom { type IntoIter = IterRangeFrom; fn into_iter(self) -> Self::IntoIter { - IterRangeFrom(self.into()) + IterRangeFrom { start: self.start, first: true } } } From 3331108f709446576b99475f86862245c193b837 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 16 Nov 2025 15:16:09 +0100 Subject: [PATCH 347/358] add must_use to extract_if methods --- alloc/src/collections/btree/map.rs | 6 ++++-- alloc/src/collections/btree/set.rs | 6 ++++-- alloc/src/collections/linked_list.rs | 3 ++- alloc/src/collections/vec_deque/extract_if.rs | 3 ++- alloc/src/collections/vec_deque/mod.rs | 3 ++- alloc/src/vec/extract_if.rs | 3 ++- alloc/src/vec/mod.rs | 3 ++- std/src/collections/hash/map.rs | 3 ++- std/src/collections/hash/set.rs | 2 ++ 9 files changed, 22 insertions(+), 10 deletions(-) diff --git a/alloc/src/collections/btree/map.rs b/alloc/src/collections/btree/map.rs index ca5b46c9b0fd0..766f4589177a8 100644 --- a/alloc/src/collections/btree/map.rs +++ b/alloc/src/collections/btree/map.rs @@ -1434,7 +1434,8 @@ impl BTreeMap { /// /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating /// or the iteration short-circuits, then the remaining elements will be retained. - /// Use [`retain`] with a negated predicate if you do not need the returned iterator. + /// Use `extract_if().for_each(drop)` if you do not need the returned iterator, + /// or [`retain`] with a negated predicate if you also do not need to restrict the range. /// /// [`retain`]: BTreeMap::retain /// @@ -1945,7 +1946,8 @@ impl Default for Values<'_, K, V> { /// An iterator produced by calling `extract_if` on BTreeMap. #[stable(feature = "btree_extract_if", since = "1.91.0")] -#[must_use = "iterators are lazy and do nothing unless consumed"] +#[must_use = "iterators are lazy and do nothing unless consumed; \ + use `retain` or `extract_if().for_each(drop)` to remove and discard elements"] pub struct ExtractIf< 'a, K, diff --git a/alloc/src/collections/btree/set.rs b/alloc/src/collections/btree/set.rs index cb3e14252f8a3..28d26699d7d2c 100644 --- a/alloc/src/collections/btree/set.rs +++ b/alloc/src/collections/btree/set.rs @@ -1189,7 +1189,8 @@ impl BTreeSet { /// /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating /// or the iteration short-circuits, then the remaining elements will be retained. - /// Use [`retain`] with a negated predicate if you do not need the returned iterator. + /// Use `extract_if().for_each(drop)` if you do not need the returned iterator, + /// or [`retain`] with a negated predicate if you also do not need to restrict the range. /// /// [`retain`]: BTreeSet::retain /// # Examples @@ -1547,7 +1548,8 @@ impl<'a, T, A: Allocator + Clone> IntoIterator for &'a BTreeSet { /// An iterator produced by calling `extract_if` on BTreeSet. #[stable(feature = "btree_extract_if", since = "1.91.0")] -#[must_use = "iterators are lazy and do nothing unless consumed"] +#[must_use = "iterators are lazy and do nothing unless consumed; \ + use `retain` or `extract_if().for_each(drop)` to remove and discard elements"] pub struct ExtractIf< 'a, T, diff --git a/alloc/src/collections/linked_list.rs b/alloc/src/collections/linked_list.rs index 31dfe73fc7992..8bc0e08a4b26b 100644 --- a/alloc/src/collections/linked_list.rs +++ b/alloc/src/collections/linked_list.rs @@ -1943,7 +1943,8 @@ impl<'a, T, A: Allocator> CursorMut<'a, T, A> { /// An iterator produced by calling `extract_if` on LinkedList. #[stable(feature = "extract_if", since = "1.87.0")] -#[must_use = "iterators are lazy and do nothing unless consumed"] +#[must_use = "iterators are lazy and do nothing unless consumed; \ + use `extract_if().for_each(drop)` to remove and discard elements"] pub struct ExtractIf< 'a, T: 'a, diff --git a/alloc/src/collections/vec_deque/extract_if.rs b/alloc/src/collections/vec_deque/extract_if.rs index bed7d46482cf4..437f0d6dd5eb3 100644 --- a/alloc/src/collections/vec_deque/extract_if.rs +++ b/alloc/src/collections/vec_deque/extract_if.rs @@ -21,7 +21,8 @@ use crate::alloc::{Allocator, Global}; /// let iter: ExtractIf<'_, _, _> = v.extract_if(.., |x| *x % 2 == 0); /// ``` #[unstable(feature = "vec_deque_extract_if", issue = "147750")] -#[must_use = "iterators are lazy and do nothing unless consumed"] +#[must_use = "iterators are lazy and do nothing unless consumed; \ + use `retain_mut` or `extract_if().for_each(drop)` to remove and discard elements"] pub struct ExtractIf< 'a, T, diff --git a/alloc/src/collections/vec_deque/mod.rs b/alloc/src/collections/vec_deque/mod.rs index 78930364a9260..52e079d3ae8e6 100644 --- a/alloc/src/collections/vec_deque/mod.rs +++ b/alloc/src/collections/vec_deque/mod.rs @@ -676,7 +676,8 @@ impl VecDeque { /// /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating /// or the iteration short-circuits, then the remaining elements will be retained. - /// Use [`retain_mut`] with a negated predicate if you do not need the returned iterator. + /// Use `extract_if().for_each(drop)` if you do not need the returned iterator, + /// or [`retain_mut`] with a negated predicate if you also do not need to restrict the range. /// /// [`retain_mut`]: VecDeque::retain_mut /// diff --git a/alloc/src/vec/extract_if.rs b/alloc/src/vec/extract_if.rs index cb9e14f554d41..014219f8d461c 100644 --- a/alloc/src/vec/extract_if.rs +++ b/alloc/src/vec/extract_if.rs @@ -16,7 +16,8 @@ use crate::alloc::{Allocator, Global}; /// let iter: std::vec::ExtractIf<'_, _, _> = v.extract_if(.., |x| *x % 2 == 0); /// ``` #[stable(feature = "extract_if", since = "1.87.0")] -#[must_use = "iterators are lazy and do nothing unless consumed"] +#[must_use = "iterators are lazy and do nothing unless consumed; \ + use `retain_mut` or `extract_if().for_each(drop)` to remove and discard elements"] pub struct ExtractIf< 'a, T, diff --git a/alloc/src/vec/mod.rs b/alloc/src/vec/mod.rs index 14272c76ed54b..13d38d3c9609a 100644 --- a/alloc/src/vec/mod.rs +++ b/alloc/src/vec/mod.rs @@ -3933,7 +3933,8 @@ impl Vec { /// /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating /// or the iteration short-circuits, then the remaining elements will be retained. - /// Use [`retain_mut`] with a negated predicate if you do not need the returned iterator. + /// Use `extract_if().for_each(drop)` if you do not need the returned iterator, + /// or [`retain_mut`] with a negated predicate if you also do not need to restrict the range. /// /// [`retain_mut`]: Vec::retain_mut /// diff --git a/std/src/collections/hash/map.rs b/std/src/collections/hash/map.rs index fc0fef620e3b6..ab21e3b927e20 100644 --- a/std/src/collections/hash/map.rs +++ b/std/src/collections/hash/map.rs @@ -1685,7 +1685,8 @@ impl<'a, K, V> Drain<'a, K, V> { /// let iter = map.extract_if(|_k, v| *v % 2 == 0); /// ``` #[stable(feature = "hash_extract_if", since = "1.88.0")] -#[must_use = "iterators are lazy and do nothing unless consumed"] +#[must_use = "iterators are lazy and do nothing unless consumed; \ + use `retain` to remove and discard elements"] pub struct ExtractIf<'a, K, V, F> { base: base::ExtractIf<'a, K, V, F>, } diff --git a/std/src/collections/hash/set.rs b/std/src/collections/hash/set.rs index 482d57b47f677..6795da80aacb6 100644 --- a/std/src/collections/hash/set.rs +++ b/std/src/collections/hash/set.rs @@ -1391,6 +1391,8 @@ pub struct Drain<'a, K: 'a> { /// let mut extract_ifed = a.extract_if(|v| v % 2 == 0); /// ``` #[stable(feature = "hash_extract_if", since = "1.88.0")] +#[must_use = "iterators are lazy and do nothing unless consumed; \ + use `retain` to remove and discard elements"] pub struct ExtractIf<'a, K, F> { base: base::ExtractIf<'a, K, F>, } From 00c8f1d181d734bda11952788936e49b96d70c13 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Tue, 11 Nov 2025 21:47:41 -0500 Subject: [PATCH 348/358] add autodiff examples --- core/Cargo.toml | 3 ++ core/src/macros/mod.rs | 68 ++++++++++++++++++++++++++++++++++++++---- std/Cargo.toml | 1 + sysroot/Cargo.toml | 1 + 4 files changed, 67 insertions(+), 6 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index d094172b07659..8f435dd72d7a1 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -23,6 +23,7 @@ optimize_for_size = [] # Make `RefCell` store additional debugging information, which is printed out when # a borrow error occurs debug_refcell = [] +llvm_enzyme = [] [lints.rust.unexpected_cfgs] level = "warn" @@ -38,4 +39,6 @@ check-cfg = [ 'cfg(target_has_reliable_f16_math)', 'cfg(target_has_reliable_f128)', 'cfg(target_has_reliable_f128_math)', + 'cfg(llvm_enzyme)', + ] diff --git a/core/src/macros/mod.rs b/core/src/macros/mod.rs index 7d7c4147983ce..4dabf4a561ec3 100644 --- a/core/src/macros/mod.rs +++ b/core/src/macros/mod.rs @@ -1511,13 +1511,43 @@ pub(crate) mod builtin { /// If used on an input argument, a new shadow argument of the same type will be created, /// directly following the original argument. /// + /// ### Usage examples: + /// + /// ```rust,ignore (autodiff requires a -Z flag as well as fat-lto for testing) + /// #![feature(autodiff)] + /// use std::autodiff::*; + /// #[autodiff_forward(rb_fwd1, Dual, Const, Dual)] + /// #[autodiff_forward(rb_fwd2, Const, Dual, Dual)] + /// #[autodiff_forward(rb_fwd3, Dual, Dual, Dual)] + /// fn rosenbrock(x: f64, y: f64) -> f64 { + /// (1.0 - x).powi(2) + 100.0 * (y - x.powi(2)).powi(2) + /// } + /// #[autodiff_forward(rb_inp_fwd, Dual, Dual, Dual)] + /// fn rosenbrock_inp(x: f64, y: f64, out: &mut f64) { + /// *out = (1.0 - x).powi(2) + 100.0 * (y - x.powi(2)).powi(2); + /// } + /// + /// fn main() { + /// let x0 = rosenbrock(1.0, 3.0); // 400.0 + /// let (x1, dx1) = rb_fwd1(1.0, 1.0, 3.0); // (400.0, -800.0) + /// let (x2, dy1) = rb_fwd2(1.0, 3.0, 1.0); // (400.0, 400.0) + /// // When seeding both arguments at once the tangent return is the sum of both. + /// let (x3, dxy) = rb_fwd3(1.0, 1.0, 3.0, 1.0); // (400.0, -400.0) + /// + /// let mut out = 0.0; + /// let mut dout = 0.0; + /// rb_inp_fwd(1.0, 1.0, 3.0, 1.0, &mut out, &mut dout); + /// // (out, dout) == (400.0, -400.0) + /// } + /// ``` + /// /// We might want to track how one input float affects one or more output floats. In this case, /// the shadow of one input should be initialized to `1.0`, while the shadows of the other /// inputs should be initialized to `0.0`. The shadow of the output(s) should be initialized to /// `0.0`. After calling the generated function, the shadow of the input will be zeroed, /// while the shadow(s) of the output(s) will contain the derivatives. Forward mode is generally /// more efficient if we have more output floats marked as `Dual` than input floats. - /// Related information can also be found unter the term "Vector-Jacobian product" (VJP). + /// Related information can also be found under the term "Vector-Jacobian product" (VJP). #[unstable(feature = "autodiff", issue = "124509")] #[allow_internal_unstable(rustc_attrs)] #[allow_internal_unstable(core_intrinsics)] @@ -1552,19 +1582,45 @@ pub(crate) mod builtin { /// `Const` should be used on non-float arguments, or float-based arguments as an optimization /// if we are not interested in computing the derivatives with respect to this argument. /// + /// ### Usage examples: + /// + /// ```rust,ignore (autodiff requires a -Z flag as well as fat-lto for testing) + /// #![feature(autodiff)] + /// use std::autodiff::*; + /// #[autodiff_reverse(rb_rev, Active, Active, Active)] + /// fn rosenbrock(x: f64, y: f64) -> f64 { + /// (1.0 - x).powi(2) + 100.0 * (y - x.powi(2)).powi(2) + /// } + /// #[autodiff_reverse(rb_inp_rev, Active, Active, Duplicated)] + /// fn rosenbrock_inp(x: f64, y: f64, out: &mut f64) { + /// *out = (1.0 - x).powi(2) + 100.0 * (y - x.powi(2)).powi(2); + /// } + /// + /// fn main() { + /// let (output1, dx1, dy1) = rb_rev(1.0, 3.0, 1.0); + /// dbg!(output1, dx1, dy1); // (400.0, -800.0, 400.0) + /// let mut output2 = 0.0; + /// let mut seed = 1.0; + /// let (dx2, dy2) = rb_inp_rev(1.0, 3.0, &mut output2, &mut seed); + /// // (dx2, dy2, output2, seed) == (-800.0, 400.0, 400.0, 0.0) + /// } + /// ``` + /// + /// /// We often want to track how one or more input floats affect one output float. This output can - /// be a scalar return value, or a mutable reference or pointer argument. In this case, the - /// shadow of the input should be marked as duplicated and initialized to `0.0`. The shadow of + /// be a scalar return value, or a mutable reference or pointer argument. In the latter case, the + /// mutable input should be marked as duplicated and its shadow initialized to `0.0`. The shadow of /// the output should be marked as active or duplicated and initialized to `1.0`. After calling - /// the generated function, the shadow(s) of the input(s) will contain the derivatives. If the - /// function has more than one output float marked as active or duplicated, users might want to + /// the generated function, the shadow(s) of the input(s) will contain the derivatives. The + /// shadow of the outputs ("seed") will be reset to zero. + /// If the function has more than one output float marked as active or duplicated, users might want to /// set one of them to `1.0` and the others to `0.0` to compute partial derivatives. /// Unlike forward-mode, a call to the generated function does not reset the shadow of the /// inputs. /// Reverse mode is generally more efficient if we have more active/duplicated input than /// output floats. /// - /// Related information can also be found unter the term "Jacobian-Vector Product" (JVP). + /// Related information can also be found under the term "Jacobian-Vector Product" (JVP). #[unstable(feature = "autodiff", issue = "124509")] #[allow_internal_unstable(rustc_attrs)] #[allow_internal_unstable(core_intrinsics)] diff --git a/std/Cargo.toml b/std/Cargo.toml index 685c2cf162abd..1ba9f7e32d91a 100644 --- a/std/Cargo.toml +++ b/std/Cargo.toml @@ -126,6 +126,7 @@ optimize_for_size = ["core/optimize_for_size", "alloc/optimize_for_size"] # a borrow error occurs debug_refcell = ["core/debug_refcell"] +llvm_enzyme = ["core/llvm_enzyme"] # Enable std_detect features: std_detect_file_io = ["std_detect/std_detect_file_io"] diff --git a/sysroot/Cargo.toml b/sysroot/Cargo.toml index ee4aec61872e7..eec8c461b6db4 100644 --- a/sysroot/Cargo.toml +++ b/sysroot/Cargo.toml @@ -35,3 +35,4 @@ profiler = ["dep:profiler_builtins"] std_detect_file_io = ["std/std_detect_file_io"] std_detect_dlsym_getauxval = ["std/std_detect_dlsym_getauxval"] windows_raw_dylib = ["std/windows_raw_dylib"] +llvm_enzyme = ["std/llvm_enzyme"] From 6573df8b2fc9336d99dee4c1e2fd5629a835f40e Mon Sep 17 00:00:00 2001 From: joboet Date: Fri, 26 Sep 2025 21:46:48 +0200 Subject: [PATCH 349/358] std: move `kernel_copy` to `sys` --- std/src/io/copy.rs | 15 ++++---- std/src/sys/fs/mod.rs | 2 +- std/src/sys/fs/unix.rs | 2 +- .../kernel_copy/linux.rs} | 35 ++++++++++--------- .../kernel_copy/linux}/tests.rs | 0 std/src/sys/io/kernel_copy/mod.rs | 23 ++++++++++++ std/src/sys/io/mod.rs | 3 ++ std/src/sys/pal/unix/mod.rs | 2 -- 8 files changed, 54 insertions(+), 28 deletions(-) rename std/src/sys/{pal/unix/kernel_copy.rs => io/kernel_copy/linux.rs} (97%) rename std/src/sys/{pal/unix/kernel_copy => io/kernel_copy/linux}/tests.rs (100%) create mode 100644 std/src/sys/io/kernel_copy/mod.rs diff --git a/std/src/io/copy.rs b/std/src/io/copy.rs index d060ad528973f..2b558efb8885e 100644 --- a/std/src/io/copy.rs +++ b/std/src/io/copy.rs @@ -4,6 +4,7 @@ use crate::cmp; use crate::collections::VecDeque; use crate::io::IoSlice; use crate::mem::MaybeUninit; +use crate::sys::io::{CopyState, kernel_copy}; #[cfg(test)] mod tests; @@ -63,19 +64,17 @@ where R: Read, W: Write, { - cfg_select! { - any(target_os = "linux", target_os = "android") => { - crate::sys::kernel_copy::copy_spec(reader, writer) - } - _ => { - generic_copy(reader, writer) + match kernel_copy(reader, writer)? { + CopyState::Ended(copied) => Ok(copied), + CopyState::Fallback(copied) => { + generic_copy(reader, writer).map(|additional| copied + additional) } } } /// The userspace read-write-loop implementation of `io::copy` that is used when /// OS-specific specializations for copy offloading are not available or not applicable. -pub(crate) fn generic_copy(reader: &mut R, writer: &mut W) -> Result +fn generic_copy(reader: &mut R, writer: &mut W) -> Result where R: Read, W: Write, @@ -269,7 +268,7 @@ impl BufferedWriterSpec for Vec { } } -pub fn stack_buffer_copy( +fn stack_buffer_copy( reader: &mut R, writer: &mut W, ) -> Result { diff --git a/std/src/sys/fs/mod.rs b/std/src/sys/fs/mod.rs index bc1052b6f8c55..eaea28871241a 100644 --- a/std/src/sys/fs/mod.rs +++ b/std/src/sys/fs/mod.rs @@ -14,7 +14,7 @@ cfg_select! { pub use unix::chroot; pub(crate) use unix::debug_assert_fd_is_open; #[cfg(any(target_os = "linux", target_os = "android"))] - pub(crate) use unix::CachedFileMetadata; + pub(super) use unix::CachedFileMetadata; use crate::sys::common::small_c_string::run_path_with_cstr as with_native_path; } target_os = "windows" => { diff --git a/std/src/sys/fs/unix.rs b/std/src/sys/fs/unix.rs index cadcfddb0f7f8..47d9ee226653e 100644 --- a/std/src/sys/fs/unix.rs +++ b/std/src/sys/fs/unix.rs @@ -2302,7 +2302,7 @@ mod cfm { } } #[cfg(any(target_os = "linux", target_os = "android"))] -pub(crate) use cfm::CachedFileMetadata; +pub(in crate::sys) use cfm::CachedFileMetadata; #[cfg(not(target_vendor = "apple"))] pub fn copy(from: &Path, to: &Path) -> io::Result { diff --git a/std/src/sys/pal/unix/kernel_copy.rs b/std/src/sys/io/kernel_copy/linux.rs similarity index 97% rename from std/src/sys/pal/unix/kernel_copy.rs rename to std/src/sys/io/kernel_copy/linux.rs index b984afa149d06..1c00d317f2a52 100644 --- a/std/src/sys/pal/unix/kernel_copy.rs +++ b/std/src/sys/io/kernel_copy/linux.rs @@ -48,9 +48,9 @@ use libc::sendfile as sendfile64; use libc::sendfile64; use libc::{EBADF, EINVAL, ENOSYS, EOPNOTSUPP, EOVERFLOW, EPERM, EXDEV}; +use super::CopyState; use crate::cmp::min; use crate::fs::{File, Metadata}; -use crate::io::copy::generic_copy; use crate::io::{ BufRead, BufReader, BufWriter, Error, PipeReader, PipeWriter, Read, Result, StderrLock, StdinLock, StdoutLock, Take, Write, @@ -70,10 +70,10 @@ use crate::sys::weak::syscall; #[cfg(test)] mod tests; -pub(crate) fn copy_spec( +pub fn kernel_copy( read: &mut R, write: &mut W, -) -> Result { +) -> Result { let copier = Copier { read, write }; SpecCopy::copy(copier) } @@ -176,17 +176,17 @@ struct Copier<'a, 'b, R: Read + ?Sized, W: Write + ?Sized> { } trait SpecCopy { - fn copy(self) -> Result; + fn copy(self) -> Result; } impl SpecCopy for Copier<'_, '_, R, W> { - default fn copy(self) -> Result { - generic_copy(self.read, self.write) + default fn copy(self) -> Result { + Ok(CopyState::Fallback(0)) } } impl SpecCopy for Copier<'_, '_, R, W> { - fn copy(self) -> Result { + fn copy(self) -> Result { let (reader, writer) = (self.read, self.write); let r_cfg = reader.properties(); let w_cfg = writer.properties(); @@ -214,7 +214,9 @@ impl SpecCopy for Copier<'_, '_, R, W> { result.update_take(reader); match result { - CopyResult::Ended(bytes_copied) => return Ok(bytes_copied + written), + CopyResult::Ended(bytes_copied) => { + return Ok(CopyState::Ended(bytes_copied + written)); + } CopyResult::Error(e, _) => return Err(e), CopyResult::Fallback(bytes) => written += bytes, } @@ -231,7 +233,9 @@ impl SpecCopy for Copier<'_, '_, R, W> { result.update_take(reader); match result { - CopyResult::Ended(bytes_copied) => return Ok(bytes_copied + written), + CopyResult::Ended(bytes_copied) => { + return Ok(CopyState::Ended(bytes_copied + written)); + } CopyResult::Error(e, _) => return Err(e), CopyResult::Fallback(bytes) => written += bytes, } @@ -244,7 +248,9 @@ impl SpecCopy for Copier<'_, '_, R, W> { result.update_take(reader); match result { - CopyResult::Ended(bytes_copied) => return Ok(bytes_copied + written), + CopyResult::Ended(bytes_copied) => { + return Ok(CopyState::Ended(bytes_copied + written)); + } CopyResult::Error(e, _) => return Err(e), CopyResult::Fallback(0) => { /* use the fallback below */ } CopyResult::Fallback(_) => { @@ -255,10 +261,7 @@ impl SpecCopy for Copier<'_, '_, R, W> { } // fallback if none of the more specialized syscalls wants to work with these file descriptors - match generic_copy(reader, writer) { - Ok(bytes) => Ok(bytes + written), - err => err, - } + Ok(CopyState::Fallback(written)) } } @@ -558,7 +561,7 @@ fn fd_to_meta(fd: &T) -> FdMeta { } } -pub(super) enum CopyResult { +enum CopyResult { Ended(u64), Error(Error, u64), Fallback(u64), @@ -587,7 +590,7 @@ const INVALID_FD: RawFd = -1; /// Callers must handle fallback to a generic copy loop. /// `Fallback` may indicate non-zero number of bytes already written /// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`). -pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> CopyResult { +fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> CopyResult { use crate::cmp; const NOT_PROBED: u8 = 0; diff --git a/std/src/sys/pal/unix/kernel_copy/tests.rs b/std/src/sys/io/kernel_copy/linux/tests.rs similarity index 100% rename from std/src/sys/pal/unix/kernel_copy/tests.rs rename to std/src/sys/io/kernel_copy/linux/tests.rs diff --git a/std/src/sys/io/kernel_copy/mod.rs b/std/src/sys/io/kernel_copy/mod.rs new file mode 100644 index 0000000000000..a89279412cf7f --- /dev/null +++ b/std/src/sys/io/kernel_copy/mod.rs @@ -0,0 +1,23 @@ +pub enum CopyState { + #[cfg_attr(not(any(target_os = "linux", target_os = "android")), expect(dead_code))] + Ended(u64), + Fallback(u64), +} + +cfg_select! { + any(target_os = "linux", target_os = "android") => { + mod linux; + pub use linux::kernel_copy; + } + _ => { + use crate::io::{Result, Read, Write}; + + pub fn kernel_copy(_reader: &mut R, _writer: &mut W) -> Result + where + R: Read, + W: Write, + { + Ok(CopyState::Fallback(0)) + } + } +} diff --git a/std/src/sys/io/mod.rs b/std/src/sys/io/mod.rs index 0916eda1c06a5..e2c5e7f88d492 100644 --- a/std/src/sys/io/mod.rs +++ b/std/src/sys/io/mod.rs @@ -50,8 +50,11 @@ mod is_terminal { } } +mod kernel_copy; + pub use io_slice::{IoSlice, IoSliceMut}; pub use is_terminal::is_terminal; +pub use kernel_copy::{CopyState, kernel_copy}; // Bare metal platforms usually have very small amounts of RAM // (in the order of hundreds of KB) diff --git a/std/src/sys/pal/unix/mod.rs b/std/src/sys/pal/unix/mod.rs index 9d303b8d65b39..81533d593131b 100644 --- a/std/src/sys/pal/unix/mod.rs +++ b/std/src/sys/pal/unix/mod.rs @@ -5,8 +5,6 @@ use crate::io::ErrorKind; #[cfg(target_os = "fuchsia")] pub mod fuchsia; pub mod futex; -#[cfg(any(target_os = "linux", target_os = "android"))] -pub mod kernel_copy; #[cfg(target_os = "linux")] pub mod linux; pub mod os; From 6fcadb10607e354cf0e88c106cb5e77bf176dbfb Mon Sep 17 00:00:00 2001 From: Jeremy Smart Date: Mon, 29 Sep 2025 23:45:19 -0400 Subject: [PATCH 350/358] recommend using a HashMap if a HashSet's second generic parameter doesn't implement BuildHasher --- core/src/hash/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/hash/mod.rs b/core/src/hash/mod.rs index a10c85640bbb6..c3f3cd7294254 100644 --- a/core/src/hash/mod.rs +++ b/core/src/hash/mod.rs @@ -633,6 +633,7 @@ impl Hasher for &mut H { /// /// [`build_hasher`]: BuildHasher::build_hasher /// [`HashMap`]: ../../std/collections/struct.HashMap.html +#[cfg_attr(not(test), rustc_diagnostic_item = "BuildHasher")] #[stable(since = "1.7.0", feature = "build_hasher")] pub trait BuildHasher { /// Type of the hasher that will be created. From e86644212690ff71349c899cdeec328f79ac45c6 Mon Sep 17 00:00:00 2001 From: Matthieu M Date: Sun, 19 Oct 2025 17:12:53 +0200 Subject: [PATCH 351/358] Improve the documentation of atomic::fence Attempt to "fix" two flaws of the current documentation: 1. The over-emphasis of fence - fence synchronization, relegating atomic - fence and fence - atomic synchronization to second fiddle. 2. The lack of explanation as to how to properly perform atomic - fence and fence - atomic synchronization. It does so by first making it clear that there are 3 different ways to use an atomic fence, then presenting a full example for each usecase, noting the particular position of the fence with regard to the atomic operation, and rounding up with generic notes. --- core/src/sync/atomic.rs | 102 ++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 15 deletions(-) diff --git a/core/src/sync/atomic.rs b/core/src/sync/atomic.rs index a3ceac89ef128..d7eaaf35b53a3 100644 --- a/core/src/sync/atomic.rs +++ b/core/src/sync/atomic.rs @@ -4289,11 +4289,81 @@ unsafe fn atomic_umin(dst: *mut T, val: T, order: Ordering) -> T { /// threads. To achieve this, a fence prevents the compiler and CPU from reordering certain types of /// memory operations around it. /// -/// A fence 'A' which has (at least) [`Release`] ordering semantics, synchronizes -/// with a fence 'B' with (at least) [`Acquire`] semantics, if and only if there -/// exist operations X and Y, both operating on some atomic object 'm' such -/// that A is sequenced before X, Y is sequenced before B and Y observes -/// the change to m. This provides a happens-before dependence between A and B. +/// There are 3 different ways to use an atomic fence: +/// +/// - atomic - fence synchronization: an atomic operation with (at least) [`Release`] ordering +/// semantics synchronizes with a fence with (at least) [`Acquire`] ordering semantics. +/// - fence - atomic synchronization: a fence with (at least) [`Release`] ordering semantics +/// synchronizes with an atomic operation with (at least) [`Acquire`] ordering semantics. +/// - fence - fence synchronization: a fence with (at least) [`Release`] ordering semantics +/// synchronizes with a fence with (at least) [`Acquire`] ordering semantics. +/// +/// These 3 ways complement the regular, fence-less, atomic - atomic synchronization. +/// +/// ## Atomic - Fence +/// +/// An atomic operation on one thread will synchronize with a fence on another thread when: +/// +/// - on thread 1: +/// - an atomic operation 'X' with (at least) [`Release`] ordering semantics on some atomic +/// object 'm', +/// +/// - is paired on thread 2 with: +/// - an atomic read 'Y' with any order on 'm', +/// - followed by a fence 'B' with (at least) [`Acquire`] ordering semantics. +/// +/// This provides a happens-before dependence between X and B. +/// +/// ```text +/// Thread 1 Thread 2 +/// +/// m.store(3, Release); X --------- +/// | +/// | +/// -------------> Y if m.load(Relaxed) == 3 { +/// B fence(Acquire); +/// ... +/// } +/// ``` +/// +/// ## Fence - Atomic +/// +/// A fence on one thread will synchronize with an atomic operation on another thread when: +/// +/// - on thread: +/// - a fence 'A' with (at least) [`Release`] ordering semantics, +/// - followed by an atomic write 'X' with any ordering on some atomic object 'm', +/// +/// - is paired on thread 2 with: +/// - an atomic operation 'Y' with (at least) [`Acquire`] ordering semantics. +/// +/// This provides a happens-before dependence between A and Y. +/// +/// ```text +/// Thread 1 Thread 2 +/// +/// fence(Release); A +/// m.store(3, Relaxed); X --------- +/// | +/// | +/// -------------> Y if m.load(Acquire) == 3 { +/// ... +/// } +/// ``` +/// +/// ## Fence - Fence +/// +/// A fence on one thread will synchronize with a fence on another thread when: +/// +/// - on thread 1: +/// - a fence 'A' which has (at least) [`Release`] ordering semantics, +/// - followed by an atomic write 'X' with any ordering on some atomic object 'm', +/// +/// - is paired on thread 2 with: +/// - an atomic read 'Y' with any ordering on 'm', +/// - followed by a fence 'B' with (at least) [`Acquire`] ordering semantics. +/// +/// This provides a happens-before dependence between A and B. /// /// ```text /// Thread 1 Thread 2 @@ -4308,18 +4378,20 @@ unsafe fn atomic_umin(dst: *mut T, val: T, order: Ordering) -> T { /// } /// ``` /// -/// Note that in the example above, it is crucial that the accesses to `m` are atomic. Fences cannot -/// be used to establish synchronization among non-atomic accesses in different threads. However, -/// thanks to the happens-before relationship between A and B, any non-atomic accesses that -/// happen-before A are now also properly synchronized with any non-atomic accesses that -/// happen-after B. +/// ## Mandatory Atomic +/// +/// Note that in the examples above, it is crucial that the access to `m` are atomic. Fences cannot +/// be used to establish synchronization between non-atomic accesses in different threads. However, +/// thanks to the happens-before relationship, any non-atomic access that happen-before the atomic +/// operation or fence with (at least) [`Release`] ordering semantics are now also properly +/// synchronized with any non-atomic accesses that happen-after the atomic operation or fence with +/// (at least) [`Acquire`] ordering semantics. /// -/// Atomic operations with [`Release`] or [`Acquire`] semantics can also synchronize -/// with a fence. +/// ## Memory Ordering /// -/// A fence which has [`SeqCst`] ordering, in addition to having both [`Acquire`] -/// and [`Release`] semantics, participates in the global program order of the -/// other [`SeqCst`] operations and/or fences. +/// A fence which has [`SeqCst`] ordering, in addition to having both [`Acquire`] and [`Release`] +/// semantics, participates in the global program order of the other [`SeqCst`] operations and/or +/// fences. /// /// Accepts [`Acquire`], [`Release`], [`AcqRel`] and [`SeqCst`] orderings. /// From 8eaad0cddd1873a10b7c57e9417586db2e973525 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 7 Aug 2023 09:28:27 -0400 Subject: [PATCH 352/358] Match ::fmt to that of str 819247f1 changed ::fmt such that it does not escape single quotes, but neglected to apply the same choice to OsString. This commit does that. --- core/src/str/lossy.rs | 7 ++++++- core/src/wtf8.rs | 16 +++++++++++----- coretests/tests/str_lossy.rs | 1 + std/src/ffi/os_str/tests.rs | 6 ++++++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/core/src/str/lossy.rs b/core/src/str/lossy.rs index 8d4210c80827d..d2dc650910f63 100644 --- a/core/src/str/lossy.rs +++ b/core/src/str/lossy.rs @@ -1,3 +1,4 @@ +use super::char::EscapeDebugExtArgs; use super::from_utf8_unchecked; use super::validations::utf8_char_width; use crate::fmt; @@ -121,7 +122,11 @@ impl fmt::Debug for Debug<'_> { let valid = chunk.valid(); let mut from = 0; for (i, c) in valid.char_indices() { - let esc = c.escape_debug(); + let esc = c.escape_debug_ext(EscapeDebugExtArgs { + escape_grapheme_extended: true, + escape_single_quote: false, + escape_double_quote: true, + }); // If char needs escaping, flush backlog so far and write, else skip if esc.len() != 1 { f.write_str(&valid[from..i])?; diff --git a/core/src/wtf8.rs b/core/src/wtf8.rs index b64fccedc19cd..7214918db6c39 100644 --- a/core/src/wtf8.rs +++ b/core/src/wtf8.rs @@ -19,7 +19,7 @@ // implementations, so, we'll have to add more doc(hidden)s anyway #![doc(hidden)] -use crate::char::encode_utf16_raw; +use crate::char::{EscapeDebugExtArgs, encode_utf16_raw}; use crate::clone::CloneToUninit; use crate::fmt::{self, Write}; use crate::hash::{Hash, Hasher}; @@ -144,14 +144,20 @@ impl AsRef<[u8]> for Wtf8 { impl fmt::Debug for Wtf8 { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result { - use crate::fmt::Write; - for c in s.chars().flat_map(|c| c.escape_debug()) { + use crate::fmt::Write as _; + for c in s.chars().flat_map(|c| { + c.escape_debug_ext(EscapeDebugExtArgs { + escape_grapheme_extended: true, + escape_single_quote: false, + escape_double_quote: true, + }) + }) { f.write_char(c)? } Ok(()) } - formatter.write_str("\"")?; + formatter.write_char('"')?; let mut pos = 0; while let Some((surrogate_pos, surrogate)) = self.next_surrogate(pos) { // SAFETY: next_surrogate provides an index for a range of valid UTF-8 bytes. @@ -164,7 +170,7 @@ impl fmt::Debug for Wtf8 { // SAFETY: after next_surrogate returns None, the remainder is valid UTF-8. write_str_escaped(formatter, unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) })?; - formatter.write_str("\"") + formatter.write_char('"') } } diff --git a/coretests/tests/str_lossy.rs b/coretests/tests/str_lossy.rs index 6e70ea3e28574..820da38dd7466 100644 --- a/coretests/tests/str_lossy.rs +++ b/coretests/tests/str_lossy.rs @@ -80,4 +80,5 @@ fn debug() { b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa".utf8_chunks().debug(), ), ); + assert_eq!("\"'\"", &format!("{:?}", b"'".utf8_chunks().debug())); } diff --git a/std/src/ffi/os_str/tests.rs b/std/src/ffi/os_str/tests.rs index 2572b71fd9ac6..3474f0ab50684 100644 --- a/std/src/ffi/os_str/tests.rs +++ b/std/src/ffi/os_str/tests.rs @@ -303,3 +303,9 @@ fn clone_to_uninit() { unsafe { a.clone_to_uninit(ptr::from_mut::(&mut b).cast()) }; assert_eq!(a, &*b); } + +#[test] +fn debug() { + let s = "'single quotes'"; + assert_eq!(format!("{:?}", OsStr::new(s)), format!("{:?}", s)); +} From ce4c3ef7592e300bd44a80bb0de6beeb73037ce9 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 13 Aug 2023 10:04:42 -0400 Subject: [PATCH 353/358] Remove workaround --- std/src/env.rs | 2 +- std/src/sys/env/common.rs | 17 ----------------- std/src/sys/env/unsupported.rs | 7 ------- std/src/sys/env/windows.rs | 24 ------------------------ 4 files changed, 1 insertion(+), 49 deletions(-) diff --git a/std/src/env.rs b/std/src/env.rs index 6d716bd854433..fd662e8a663a9 100644 --- a/std/src/env.rs +++ b/std/src/env.rs @@ -170,7 +170,7 @@ impl Iterator for Vars { impl fmt::Debug for Vars { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { inner: VarsOs { inner } } = self; - f.debug_struct("Vars").field("inner", &inner.str_debug()).finish() + f.debug_struct("Vars").field("inner", inner).finish() } } diff --git a/std/src/sys/env/common.rs b/std/src/sys/env/common.rs index f161ff073f3d5..87e86e2947fad 100644 --- a/std/src/sys/env/common.rs +++ b/std/src/sys/env/common.rs @@ -5,27 +5,10 @@ pub struct Env { iter: vec::IntoIter<(OsString, OsString)>, } -// FIXME(https://github.com/rust-lang/rust/issues/114583): Remove this when ::fmt matches ::fmt. -pub struct EnvStrDebug<'a> { - slice: &'a [(OsString, OsString)], -} - -impl fmt::Debug for EnvStrDebug<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list() - .entries(self.slice.iter().map(|(a, b)| (a.to_str().unwrap(), b.to_str().unwrap()))) - .finish() - } -} - impl Env { pub(super) fn new(env: Vec<(OsString, OsString)>) -> Self { Env { iter: env.into_iter() } } - - pub fn str_debug(&self) -> impl fmt::Debug + '_ { - EnvStrDebug { slice: self.iter.as_slice() } - } } impl fmt::Debug for Env { diff --git a/std/src/sys/env/unsupported.rs b/std/src/sys/env/unsupported.rs index 98905e6482747..a967ace95f02a 100644 --- a/std/src/sys/env/unsupported.rs +++ b/std/src/sys/env/unsupported.rs @@ -3,13 +3,6 @@ use crate::{fmt, io}; pub struct Env(!); -impl Env { - // FIXME(https://github.com/rust-lang/rust/issues/114583): Remove this when ::fmt matches ::fmt. - pub fn str_debug(&self) -> impl fmt::Debug + '_ { - self.0 - } -} - impl fmt::Debug for Env { fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { self.0 diff --git a/std/src/sys/env/windows.rs b/std/src/sys/env/windows.rs index 3c4d4a84cfd6b..219fcc4fb43f9 100644 --- a/std/src/sys/env/windows.rs +++ b/std/src/sys/env/windows.rs @@ -8,30 +8,6 @@ pub struct Env { iter: EnvIterator, } -// FIXME(https://github.com/rust-lang/rust/issues/114583): Remove this when ::fmt matches ::fmt. -pub struct EnvStrDebug<'a> { - iter: &'a EnvIterator, -} - -impl fmt::Debug for EnvStrDebug<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { iter } = self; - let iter: EnvIterator = (*iter).clone(); - let mut list = f.debug_list(); - for (a, b) in iter { - list.entry(&(a.to_str().unwrap(), b.to_str().unwrap())); - } - list.finish() - } -} - -impl Env { - pub fn str_debug(&self) -> impl fmt::Debug + '_ { - let Self { base: _, iter } = self; - EnvStrDebug { iter } - } -} - impl fmt::Debug for Env { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { base: _, iter } = self; From ed772fb0bf2f1d305458dc619893aea8b884d17a Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Sat, 25 Oct 2025 04:15:43 +0000 Subject: [PATCH 354/358] Replace OffsetOf by an actual sum. --- core/src/intrinsics/mod.rs | 17 +++++++++++++++++ core/src/mem/mod.rs | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index c987d80be8b42..18d8b9880dfac 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -2791,6 +2791,23 @@ pub const fn size_of() -> usize; #[rustc_intrinsic] pub const fn align_of() -> usize; +/// The offset of a field inside a type. +/// +/// Note that, unlike most intrinsics, this is safe to call; +/// it does not require an `unsafe` block. +/// Therefore, implementations must not require the user to uphold +/// any safety invariants. +/// +/// The stabilized version of this intrinsic is [`core::mem::offset_of`]. +#[rustc_nounwind] +#[unstable(feature = "core_intrinsics", issue = "none")] +#[rustc_const_unstable(feature = "core_intrinsics", issue = "none")] +#[rustc_const_stable_indirect] +#[rustc_intrinsic_const_stable_indirect] +#[rustc_intrinsic] +#[lang = "offset_of"] +pub const fn offset_of(variant: u32, field: u32) -> usize; + /// Returns the number of variants of the type `T` cast to a `usize`; /// if `T` has no variants, returns `0`. Uninhabited variants will be counted. /// diff --git a/core/src/mem/mod.rs b/core/src/mem/mod.rs index 844030119c44d..7d9d7f3f586cf 100644 --- a/core/src/mem/mod.rs +++ b/core/src/mem/mod.rs @@ -1424,10 +1424,10 @@ impl SizedTypeProperties for T {} /// [`offset_of_enum`]: https://doc.rust-lang.org/nightly/unstable-book/language-features/offset-of-enum.html /// [`offset_of_slice`]: https://doc.rust-lang.org/nightly/unstable-book/language-features/offset-of-slice.html #[stable(feature = "offset_of", since = "1.77.0")] -#[allow_internal_unstable(builtin_syntax)] +#[allow_internal_unstable(builtin_syntax, core_intrinsics)] pub macro offset_of($Container:ty, $($fields:expr)+ $(,)?) { // The `{}` is for better error messages - {builtin # offset_of($Container, $($fields)+)} + const {builtin # offset_of($Container, $($fields)+)} } /// Create a fresh instance of the inhabited ZST type `T`. From 0621a12d48c7b56ada078d1a6f31c7b75f9f7145 Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Sat, 1 Nov 2025 23:18:21 +0000 Subject: [PATCH 355/358] Honor allow_internal_unstable for const intrinsics. --- core/src/intrinsics/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index 18d8b9880dfac..374c749c2da07 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -2802,7 +2802,6 @@ pub const fn align_of() -> usize; #[rustc_nounwind] #[unstable(feature = "core_intrinsics", issue = "none")] #[rustc_const_unstable(feature = "core_intrinsics", issue = "none")] -#[rustc_const_stable_indirect] #[rustc_intrinsic_const_stable_indirect] #[rustc_intrinsic] #[lang = "offset_of"] From c3a864fb64753d9b592e5b49e0df63a22a21bd77 Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Thu, 6 Nov 2025 23:48:12 +0000 Subject: [PATCH 356/358] Complete doc. --- core/src/intrinsics/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/intrinsics/mod.rs b/core/src/intrinsics/mod.rs index 374c749c2da07..34031d7775aca 100644 --- a/core/src/intrinsics/mod.rs +++ b/core/src/intrinsics/mod.rs @@ -2798,7 +2798,11 @@ pub const fn align_of() -> usize; /// Therefore, implementations must not require the user to uphold /// any safety invariants. /// +/// This intrinsic can only be evaluated at compile-time, and should only appear in +/// constants or inline const blocks. +/// /// The stabilized version of this intrinsic is [`core::mem::offset_of`]. +/// This intrinsic is also a lang item so `offset_of!` can desugar to calls to it. #[rustc_nounwind] #[unstable(feature = "core_intrinsics", issue = "none")] #[rustc_const_unstable(feature = "core_intrinsics", issue = "none")] From c6bbdfe877cbb1dba1ccc60cdcb85fb4cde1d53f Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Sat, 15 Nov 2025 15:07:11 +0530 Subject: [PATCH 357/358] std: sys: fs: uefi: Implement stat - Implement std::fs::metadata function. - Tested on qemu ovmf Signed-off-by: Ayush Singh --- std/src/sys/fs/uefi.rs | 56 +++++++++++++++++++++++++++++---- std/src/sys/pal/uefi/helpers.rs | 4 +++ 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/std/src/sys/fs/uefi.rs b/std/src/sys/fs/uefi.rs index fc5f159ec188e..18c1501a655fe 100644 --- a/std/src/sys/fs/uefi.rs +++ b/std/src/sys/fs/uefi.rs @@ -7,7 +7,7 @@ use crate::hash::Hash; use crate::io::{self, BorrowedCursor, IoSlice, IoSliceMut, SeekFrom}; use crate::path::{Path, PathBuf}; use crate::sys::time::SystemTime; -use crate::sys::unsupported; +use crate::sys::{helpers, unsupported}; #[expect(dead_code)] const FILE_PERMISSIONS_MASK: u64 = r_efi::protocols::file::READ_ONLY; @@ -76,6 +76,18 @@ impl FileAttr { pub fn created(&self) -> io::Result { Ok(self.created) } + + fn from_uefi(info: helpers::UefiBox) -> Self { + unsafe { + Self { + attr: (*info.as_ptr()).attribute, + size: (*info.as_ptr()).size, + modified: uefi_fs::uefi_to_systemtime((*info.as_ptr()).modification_time), + accessed: uefi_fs::uefi_to_systemtime((*info.as_ptr()).last_access_time), + created: uefi_fs::uefi_to_systemtime((*info.as_ptr()).create_time), + } + } + } } impl FilePermissions { @@ -381,8 +393,10 @@ pub fn link(_src: &Path, _dst: &Path) -> io::Result<()> { unsupported() } -pub fn stat(_p: &Path) -> io::Result { - unsupported() +pub fn stat(p: &Path) -> io::Result { + let f = uefi_fs::File::from_path(p, r_efi::protocols::file::MODE_READ, 0)?; + let inf = f.file_info()?; + Ok(FileAttr::from_uefi(inf)) } pub fn lstat(p: &Path) -> io::Result { @@ -404,7 +418,7 @@ mod uefi_fs { use crate::io; use crate::path::Path; use crate::ptr::NonNull; - use crate::sys::helpers; + use crate::sys::helpers::{self, UefiBox}; use crate::sys::time::{self, SystemTime}; pub(crate) struct File(NonNull); @@ -492,6 +506,37 @@ mod uefi_fs { let p = NonNull::new(file_opened).unwrap(); Ok(File(p)) } + + pub(crate) fn file_info(&self) -> io::Result> { + let file_ptr = self.0.as_ptr(); + let mut info_id = file::INFO_ID; + let mut buf_size = 0; + + let r = unsafe { + ((*file_ptr).get_info)( + file_ptr, + &mut info_id, + &mut buf_size, + crate::ptr::null_mut(), + ) + }; + assert!(r.is_error()); + if r != r_efi::efi::Status::BUFFER_TOO_SMALL { + return Err(io::Error::from_raw_os_error(r.as_usize())); + } + + let mut info: UefiBox = UefiBox::new(buf_size)?; + let r = unsafe { + ((*file_ptr).get_info)( + file_ptr, + &mut info_id, + &mut buf_size, + info.as_mut_ptr().cast(), + ) + }; + + if r.is_error() { Err(io::Error::from_raw_os_error(r.as_usize())) } else { Ok(info) } + } } impl Drop for File { @@ -556,8 +601,7 @@ mod uefi_fs { /// EDK2 FAT driver uses EFI_UNSPECIFIED_TIMEZONE to represent localtime. So for proper /// conversion to SystemTime, we use the current time to get the timezone in such cases. - #[expect(dead_code)] - fn uefi_to_systemtime(mut time: r_efi::efi::Time) -> SystemTime { + pub(crate) fn uefi_to_systemtime(mut time: r_efi::efi::Time) -> SystemTime { time.timezone = if time.timezone == r_efi::efi::UNSPECIFIED_TIMEZONE { time::system_time_internal::now().unwrap().timezone } else { diff --git a/std/src/sys/pal/uefi/helpers.rs b/std/src/sys/pal/uefi/helpers.rs index 852e0d6b051bd..bfad6491e3219 100644 --- a/std/src/sys/pal/uefi/helpers.rs +++ b/std/src/sys/pal/uefi/helpers.rs @@ -798,6 +798,10 @@ impl UefiBox { pub(crate) fn as_mut_ptr(&mut self) -> *mut T { self.inner.as_ptr().cast() } + + pub(crate) fn as_ptr(&self) -> *const T { + self.inner.as_ptr().cast() + } } impl Drop for UefiBox { From 6579c5406f5d75fea620d988055a3f8348fff942 Mon Sep 17 00:00:00 2001 From: Shun Sakai Date: Wed, 19 Nov 2025 03:17:38 +0900 Subject: [PATCH 358/358] feat: Change return type of `NonZero::bit_width` Return `NonZero` instead of `u32`. --- core/src/num/nonzero.rs | 11 +++++++---- coretests/tests/nonzero.rs | 8 ++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs index baabf1d90b88a..30c3fb8b5c52f 100644 --- a/core/src/num/nonzero.rs +++ b/core/src/num/nonzero.rs @@ -1789,8 +1789,9 @@ macro_rules! nonzero_integer_signedness_dependent_methods { /// # /// # fn main() { test().unwrap(); } /// # fn test() -> Option<()> { - #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b111)?.bit_width(), 3);")] - #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b1110)?.bit_width(), 4);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::MIN.bit_width(), NonZero::new(1)?);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b111)?.bit_width(), NonZero::new(3)?);")] + #[doc = concat!("assert_eq!(NonZero::<", stringify!($Int), ">::new(0b1110)?.bit_width(), NonZero::new(4)?);")] /// # Some(()) /// # } /// ``` @@ -1798,8 +1799,10 @@ macro_rules! nonzero_integer_signedness_dependent_methods { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline(always)] - pub const fn bit_width(self) -> u32 { - Self::BITS - self.leading_zeros() + pub const fn bit_width(self) -> NonZero { + // SAFETY: Since `self.leading_zeros()` is always less than + // `Self::BITS`, this subtraction can never be zero. + unsafe { NonZero::new_unchecked(Self::BITS - self.leading_zeros()) } } }; diff --git a/coretests/tests/nonzero.rs b/coretests/tests/nonzero.rs index 4bcc61de8dcad..c368a2621740b 100644 --- a/coretests/tests/nonzero.rs +++ b/coretests/tests/nonzero.rs @@ -577,10 +577,10 @@ fn test_nonzero_bit_width() { ($($T:ty),+) => { $( { - assert_eq!(NonZero::<$T>::new(0b010_1100).unwrap().bit_width(), 6); - assert_eq!(NonZero::<$T>::new(0b111_1001).unwrap().bit_width(), 7); - assert_eq!(NonZero::<$T>::MIN.bit_width(), 1); - assert_eq!(NonZero::<$T>::MAX.bit_width(), <$T>::BITS); + assert_eq!(NonZero::<$T>::new(0b010_1100).unwrap().bit_width(), NonZero::new(6).unwrap()); + assert_eq!(NonZero::<$T>::new(0b111_1001).unwrap().bit_width(), NonZero::new(7).unwrap()); + assert_eq!(NonZero::<$T>::MIN.bit_width(), NonZero::new(1).unwrap()); + assert_eq!(NonZero::<$T>::MAX.bit_width(), NonZero::new(<$T>::BITS).unwrap()); } )+ };